void newScanKey(IndexScanDesc scan) { ScanKey scankey = scan->keyData; GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; uint32 nkeys = 0; so->keys = (GinScanKey) palloc(scan->numberOfKeys * sizeof(GinScanKeyData)); if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GIN indexes do not support whole-index scans"))); for (i = 0; i < scan->numberOfKeys; i++) { Datum *entryValues; uint32 nEntryValues; if (scankey[i].sk_flags & SK_ISNULL) elog(ERROR, "Gin doesn't support NULL as scan key"); Assert(scankey[i].sk_attno == 1); entryValues = (Datum *) DatumGetPointer( FunctionCall3( &so->ginstate.extractQueryFn, scankey[i].sk_argument, PointerGetDatum(&nEntryValues), UInt16GetDatum(scankey[i].sk_strategy) ) ); if (entryValues == NULL || nEntryValues == 0) /* full scan... */ continue; fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument, entryValues, nEntryValues, scankey[i].sk_strategy); nkeys++; } so->nkeys = nkeys; if (so->nkeys == 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GIN index does not support search with void query"))); pgstat_count_index_scan(&scan->xs_pgstat_info); }
/* * gistgetbitmap() -- Get a bitmap of all heap tuple locations */ Datum gistgetbitmap(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); Node *n = (Node *) PG_GETARG_POINTER(1); TIDBitmap *tbm; GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int64 ntids = 0; GISTSearchItem fakeItem; if (n == NULL) tbm = tbm_create(work_mem * 1024L); else if (!IsA(n, TIDBitmap)) elog(ERROR, "non hash bitmap"); else tbm = (TIDBitmap *) n; if (!so->qual_ok) PG_RETURN_POINTER(tbm); pgstat_count_index_scan(scan->indexRelation); /* Begin the scan by processing the root page */ so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* * While scanning a leaf page, ItemPointers of matching heap tuples will * be stored directly into tbm, so we don't need to deal with them here. */ for (;;) { GISTSearchItem *item = getNextGISTSearchItem(so); if (!item) break; CHECK_FOR_INTERRUPTS(); gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids); pfree(item); } PG_RETURN_POINTER(tbm); }
/* * gistgetbitmap() -- Get a bitmap of all heap tuple locations */ int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int64 ntids = 0; GISTSearchItem fakeItem; if (!so->qual_ok) return 0; pgstat_count_index_scan(scan->indexRelation); /* Begin the scan by processing the root page */ so->curPageData = so->nPageData = 0; scan->xs_hitup = NULL; if (so->pageDataCxt) MemoryContextReset(so->pageDataCxt); fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* * While scanning a leaf page, ItemPointers of matching heap tuples will * be stored directly into tbm, so we don't need to deal with them here. */ for (;;) { GISTSearchItem *item = getNextGISTSearchItem(so); if (!item) break; CHECK_FOR_INTERRUPTS(); gistScanPage(scan, item, item->distances, tbm, &ntids); pfree(item); } return ntids; }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the direction of scan, the search * conditions, and the tree ordering. We find the first item (or, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, the page containing * the current index tuple is pinned but not locked, and data about * the matching tuple(s) on the page has been loaded into so->currPos. * scan->xs_ctup.t_self is set to the heap TID of the current tuple, * and if requested, scan->xs_itup points to a copy of the index tuple. * * If there are no matching items in the index, we return FALSE, with no * pins or locks held. * * Note that scan->keyData[], and the so->keyData[] scankey built from it, * are both search-type scankeys (see nbtree/README for more about this). * Within this routine, we build a temporary insertion-type scankey to use * in locating the scan start position. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; BTStack stack; OffsetNumber offnum; StrategyNumber strat; bool nextkey; bool goback; ScanKey startKeys[INDEX_MAX_KEYS]; ScanKeyData scankeys[INDEX_MAX_KEYS]; ScanKeyData notnullkeys[INDEX_MAX_KEYS]; int keysCount = 0; int i; StrategyNumber strat_total; BTScanPosItem *currItem; pgstat_count_index_scan(rel); /* * Examine the scan keys and eliminate any redundant keys; also mark the * keys that must be matched to continue the scan. */ _bt_preprocess_keys(scan); /* * Quit now if _bt_preprocess_keys() discovered that the scan keys can * never be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /*---------- * Examine the scan keys to discover where we need to start the scan. * * We want to identify the keys that can be used as starting boundaries; * these are =, >, or >= keys for a forward scan or =, <, <= keys for * a backwards scan. We can use keys for multiple attributes so long as * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept * a > or < boundary or find an attribute with no boundary (which can be * thought of as the same as "> -infinity"), we can't use keys for any * attributes to its right, because it would break our simplistic notion * of what initial positioning strategy to use. * * When the scan keys include cross-type operators, _bt_preprocess_keys * may not be able to eliminate redundant keys; in such cases we will * arbitrarily pick a usable one for each attribute. This is correct * but possibly not optimal behavior. (For example, with keys like * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when * x=5 would be more efficient.) Since the situation only arises given * a poorly-worded query plus an incomplete opfamily, live with it. * * When both equality and inequality keys appear for a single attribute * (again, only possible when cross-type operators appear), we *must* * select one of the equality keys for the starting point, because * _bt_checkkeys() will stop the scan as soon as an equality qual fails. * For example, if we have keys like "x >= 4 AND x = 10" and we elect to * start at x=4, we will fail and stop before reaching x=10. If multiple * equality quals survive preprocessing, however, it doesn't matter which * one we use --- by definition, they are either redundant or * contradictory. * * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier. * If the index stores nulls at the end of the index we'll be starting * from, and we have no boundary key for the column (which means the key * we deduced NOT NULL from is an inequality key that constrains the other * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to * use as a boundary key. If we didn't do this, we might find ourselves * traversing a lot of null entries at the start of the scan. * * In this loop, row-comparison keys are treated the same as keys on their * first (leftmost) columns. We'll add on lower-order columns of the row * comparison below, if possible. * * The selected scan keys (at most one per index column) are remembered by * storing their addresses into the local startKeys[] array. *---------- */ strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { AttrNumber curattr; ScanKey chosen; ScanKey impliesNN; ScanKey cur; /* * chosen is the so-far-chosen key for the current attribute, if any. * We don't cast the decision in stone until we reach keys for the * next attribute. */ curattr = 1; chosen = NULL; /* Also remember any scankey that implies a NOT NULL constraint */ impliesNN = NULL; /* * Loop iterates from 0 to numberOfKeys inclusive; we use the last * pass to handle after-last-key processing. Actual exit from the * loop is at one of the "break" statements below. */ for (cur = so->keyData, i = 0;; cur++, i++) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { /* * Done looking at keys for curattr. If we didn't find a * usable boundary key, see if we can deduce a NOT NULL key. */ if (chosen == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { /* Yes, so build the key in notnullkeys[keysCount] */ chosen = ¬nullkeys[keysCount]; ScanKeyEntryInitialize(chosen, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), curattr, ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? BTGreaterStrategyNumber : BTLessStrategyNumber), InvalidOid, InvalidOid, InvalidOid, (Datum) 0); } /* * If we still didn't find a usable boundary key, quit; else * save the boundary key pointer in startKeys. */ if (chosen == NULL) break; startKeys[keysCount++] = chosen; /* * Adjust strat_total, and quit if we have stored a > or < * key. */ strat = chosen->sk_strategy; if (strat != BTEqualStrategyNumber) { strat_total = strat; if (strat == BTGreaterStrategyNumber || strat == BTLessStrategyNumber) break; } /* * Done if that was the last attribute, or if next key is not * in sequence (implying no boundary key is available for the * next attribute). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) break; /* * Reset for next attr. */ curattr = cur->sk_attno; chosen = NULL; impliesNN = NULL; } /* * Can we use this key as a starting boundary for this attr? * * If not, does it imply a NOT NULL constraint? (Because * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber, * *any* inequality key works for that; we need not test.) */ switch (cur->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsBackward(dir)) chosen = cur; else impliesNN = cur; } break; case BTEqualStrategyNumber: /* override any non-equality choice */ chosen = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsForward(dir)) chosen = cur; else impliesNN = cur; } break; } } } /* * If we found no usable boundary keys, we have to start from one end of * the tree. Walk down that edge to the first or last key, and scan from * there. */ if (keysCount == 0) return _bt_endpoint(scan, dir); /* * We want to start the scan somewhere within the index. Set up an * insertion scankey we can use to search for the boundary point we * identified above. The insertion scankey is built in the local * scankeys[] array, using the keys identified by startKeys[]. */ Assert(keysCount <= INDEX_MAX_KEYS); for (i = 0; i < keysCount; i++) { ScanKey cur = startKeys[i]; Assert(cur->sk_attno == i + 1); if (cur->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead. * * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function), but we have to watch * out for nulls, which _bt_preprocess_keys didn't check. A null * in the first row member makes the condition unmatchable, just * like qual_ok = false. */ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) return false; memcpy(scankeys + i, subkey, sizeof(ScanKeyData)); /* * If the row comparison is the last positioning key we accepted, * try to add additional keys from the lower-order row members. * (If we accepted independent conditions on additional index * columns, we use those instead --- doesn't seem worth trying to * determine which is more restrictive.) Note that this is OK * even if the row comparison is of ">" or "<" type, because the * condition applied to all but the last row member is effectively * ">=" or "<=", and so the extra keys don't break the positioning * scheme. But, by the same token, if we aren't able to use all * the row members, then the part of the row comparison that we * did use has to be treated as just a ">=" or "<=" condition, and * so we'd better adjust strat_total accordingly. */ if (i == keysCount - 1) { bool used_all_subkeys = false; Assert(!(subkey->sk_flags & SK_ROW_END)); for (;;) { subkey++; Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_attno != keysCount + 1) break; /* out-of-sequence, can't use it */ if (subkey->sk_strategy != cur->sk_strategy) break; /* wrong direction, can't use it */ if (subkey->sk_flags & SK_ISNULL) break; /* can't use null keys */ Assert(keysCount < INDEX_MAX_KEYS); memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData)); keysCount++; if (subkey->sk_flags & SK_ROW_END) { used_all_subkeys = true; break; } } if (!used_all_subkeys) { switch (strat_total) { case BTLessStrategyNumber: strat_total = BTLessEqualStrategyNumber; break; case BTGreaterStrategyNumber: strat_total = BTGreaterEqualStrategyNumber; break; } } break; /* done with outer loop */ } } else { /* * Ordinary comparison key. Transform the search-style scan key * to an insertion scan key by replacing the sk_func with the * appropriate btree comparison function. * * If scankey operator is not a cross-type comparison, we can use * the cached comparison function; otherwise gotta look it up in * the catalogs. (That can't lead to infinite recursion, since no * indexscan initiated by syscache lookup will use cross-data-type * operators.) * * We support the convention that sk_subtype == InvalidOid means * the opclass input type; this is a hack to simplify life for * ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[i] || cur->sk_subtype == InvalidOid) { FmgrInfo *procinfo; procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, procinfo, cur->sk_argument); } else { RegProcedure cmp_proc; cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], rel->rd_opcintype[i], cur->sk_subtype, BTORDER_PROC); if (!RegProcedureIsValid(cmp_proc)) elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype, cur->sk_attno, RelationGetRelationName(rel)); ScanKeyEntryInitialize(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, cmp_proc, cur->sk_argument); } } } /*---------- * Examine the selected initial-positioning strategy to determine exactly * where we need to start the scan, and set flag variables to control the * code below. * * If nextkey = false, _bt_search and _bt_binsrch will locate the first * item >= scan key. If nextkey = true, they will locate the first * item > scan key. * * If goback = true, we will then step back one item, while if * goback = false, we will start the scan on the located item. *---------- */ switch (strat_total) { case BTLessStrategyNumber: /* * Find first item >= scankey, then back up one to arrive at last * item < scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = false; goback = true; break; case BTLessEqualStrategyNumber: /* * Find first item > scankey, then back up one to arrive at last * item <= scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = true; goback = true; break; case BTEqualStrategyNumber: /* * If a backward scan was specified, need to start with last equal * item not first one. */ if (ScanDirectionIsBackward(dir)) { /* * This is the same as the <= strategy. We will check at the * end whether the found item is actually =. */ nextkey = true; goback = true; } else { /* * This is the same as the >= strategy. We will check at the * end whether the found item is actually =. */ nextkey = false; goback = false; } break; case BTGreaterEqualStrategyNumber: /* * Find first item >= scankey. (This is only used for forward * scans.) */ nextkey = false; goback = false; break; case BTGreaterStrategyNumber: /* * Find first item > scankey. (This is only used for forward * scans.) */ nextkey = true; goback = false; break; default: /* can't get here, but keep compiler quiet */ elog(ERROR, "unrecognized strat_total: %d", (int) strat_total); return false; } /* * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); /* remember which buffer we have pinned, if any */ so->currPos.buf = buf; if (!BufferIsValid(buf)) { /* * We only get here if the index is completely empty. Lock relation * because nothing finer to lock exists. */ PredicateLockRelation(rel, scan->xs_snapshot); return false; } else PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); /* initialize moreLeft/moreRight appropriately for scan direction */ if (ScanDirectionIsForward(dir)) { so->currPos.moreLeft = false; so->currPos.moreRight = true; } else { so->currPos.moreLeft = true; so->currPos.moreRight = false; } so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey); /* * If nextkey = false, we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are less * than the scan key and we know that everything on later pages is greater * than or equal to scan key. * * If nextkey = true, we are positioned at the first item > scan key, or * possibly at the end of a page on which all the existing items are less * than or equal to the scan key and we know that everything on later * pages is greater than scan key. * * The actually desired starting point is either this item or the prior * one, or in the end-of-page case it's the first item on the next page or * the last item on this page. Adjust the starting offset if needed. (If * this results in an offset before the first item or after the last one, * _bt_readpage will report no items found, and then we'll step to the * next page as needed.) */ if (goback) offnum = OffsetNumberPrev(offnum); /* * Now load data from the first page of the scan. */ if (!_bt_readpage(scan, dir, offnum)) { /* * There's no actually-matching data on this page. Try to advance to * the next page. Return false if there's no matching data at all. */ if (!_bt_steppage(scan, dir)) return false; } /* Drop the lock, but not pin, on the current page */ LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_ctup.t_self = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; uint32 hashkey; Bucket bucket; BlockNumber blkno; Buffer buf; Buffer metabuf; Page page; HashPageOpaque opaque; HashMetaPage metap; IndexTuple itup; ItemPointer current; OffsetNumber offnum; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; pgstat_count_index_scan(rel); current = &(scan->currentItemData); ItemPointerSetInvalid(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (scan->keyData[0].sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. */ hashkey = _hash_datum2hashkey(rel, scan->keyData[0].sk_argument); /* * Acquire shared split lock so we can compute the target bucket safely * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); _hash_checkpage(rel, metabuf, LH_META_PAGE); metap = (HashMetaPage) BufferGetPage(metabuf); /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket(hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* done with the metapage */ _hash_relbuf(rel, metabuf); /* * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); _hash_droplock(rel, 0, HASH_SHARE); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ); _hash_checkpage(rel, buf, LH_BUCKET_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (ScanDirectionIsBackward(dir)) { while (BlockNumberIsValid(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ItemPointerGetOffsetNumber(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); scan->xs_ctup.t_self = itup->t_tid; return true; }
/* * gistgettuple() -- Get the next tuple in the scan */ Datum gistgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); GISTScanOpaque so = (GISTScanOpaque) scan->opaque; if (dir != ForwardScanDirection) elog(ERROR, "GiST only supports forward scan direction"); if (!so->qual_ok) PG_RETURN_BOOL(false); if (so->firstCall) { /* Begin the scan by processing the root page */ GISTSearchItem fakeItem; pgstat_count_index_scan(scan->indexRelation); so->firstCall = false; so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, NULL, NULL); } if (scan->numberOfOrderBys > 0) { /* Must fetch tuples in strict distance order */ PG_RETURN_BOOL(getNextNearest(scan)); } else { /* Fetch tuples index-page-at-a-time */ for (;;) { if (so->curPageData < so->nPageData) { /* continuing to return tuples from a leaf page */ scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; so->curPageData++; PG_RETURN_BOOL(true); } /* find and process the next index page */ do { GISTSearchItem *item = getNextGISTSearchItem(so); if (!item) PG_RETURN_BOOL(false); CHECK_FOR_INTERRUPTS(); /* * While scanning a leaf page, ItemPointers of matching heap * tuples are stored in so->pageData. If there are any on * this page, we fall out of the inner "do" and loop around to * return them. */ gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); pfree(item); } while (so->nPageData == 0); } } }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ScanKey cur; uint32 hashkey; Bucket bucket; BlockNumber blkno; Buffer buf; Buffer metabuf; Page page; HashPageOpaque opaque; HashMetaPage metap; IndexTuple itup; ItemPointer current; OffsetNumber offnum; pgstat_count_index_scan(rel); current = &(so->hashso_curpos); ItemPointerSetInvalid(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); /* There may be more than one index qual, but we hash only the first */ cur = &scan->keyData[0]; /* We support only single-column hash indexes */ Assert(cur->sk_attno == 1); /* And there's only one operator strategy, too */ Assert(cur->sk_strategy == HTEqualStrategyNumber); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (cur->sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. * * If scankey operator is not a cross-type comparison, we can use the * cached hash function; otherwise gotta look it up in the catalogs. * * We support the convention that sk_subtype == InvalidOid means the * opclass input type; this is a hack to simplify life for ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[0] || cur->sk_subtype == InvalidOid) hashkey = _hash_datum2hashkey(rel, cur->sk_argument); else hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); so->hashso_sk_hash = hashkey; /* * Acquire shared split lock so we can compute the target bucket safely * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket(hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* done with the metapage */ _hash_relbuf(rel, metabuf); /* * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); _hash_droplock(rel, 0, HASH_SHARE); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (ScanDirectionIsBackward(dir)) { while (BlockNumberIsValid(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ItemPointerGetOffsetNumber(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); so->hashso_heappos = itup->t_tid; return true; }
/* * gistgettuple() -- Get the next tuple in the scan */ bool gistgettuple(IndexScanDesc scan, ScanDirection dir) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; if (dir != ForwardScanDirection) elog(ERROR, "GiST only supports forward scan direction"); if (!so->qual_ok) return false; if (so->firstCall) { /* Begin the scan by processing the root page */ GISTSearchItem fakeItem; pgstat_count_index_scan(scan->indexRelation); so->firstCall = false; so->curPageData = so->nPageData = 0; scan->xs_hitup = NULL; if (so->pageDataCxt) MemoryContextReset(so->pageDataCxt); fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, NULL, NULL); } if (scan->numberOfOrderBys > 0) { /* Must fetch tuples in strict distance order */ return getNextNearest(scan); } else { /* Fetch tuples index-page-at-a-time */ for (;;) { if (so->curPageData < so->nPageData) { if (scan->kill_prior_tuple && so->curPageData > 0) { if (so->killedItems == NULL) { MemoryContext oldCxt = MemoryContextSwitchTo(so->giststate->scanCxt); so->killedItems = (OffsetNumber *) palloc(MaxIndexTuplesPerPage * sizeof(OffsetNumber)); MemoryContextSwitchTo(oldCxt); } if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->pageData[so->curPageData - 1].offnum; } /* continuing to return tuples from a leaf page */ scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; /* in an index-only scan, also return the reconstructed tuple */ if (scan->xs_want_itup) scan->xs_hitup = so->pageData[so->curPageData].recontup; so->curPageData++; return true; } /* * Check the last returned tuple and add it to killitems if * necessary */ if (scan->kill_prior_tuple && so->curPageData > 0 && so->curPageData == so->nPageData) { if (so->killedItems == NULL) { MemoryContext oldCxt = MemoryContextSwitchTo(so->giststate->scanCxt); so->killedItems = (OffsetNumber *) palloc(MaxIndexTuplesPerPage * sizeof(OffsetNumber)); MemoryContextSwitchTo(oldCxt); } if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->pageData[so->curPageData - 1].offnum; } /* find and process the next index page */ do { GISTSearchItem *item; if ((so->curBlkno != InvalidBlockNumber) && (so->numKilled > 0)) gistkillitems(scan); item = getNextGISTSearchItem(so); if (!item) return false; CHECK_FOR_INTERRUPTS(); /* save current item BlockNumber for next gistkillitems() call */ so->curBlkno = item->blkno; /* * While scanning a leaf page, ItemPointers of matching heap * tuples are stored in so->pageData. If there are any on * this page, we fall out of the inner "do" and loop around to * return them. */ gistScanPage(scan, item, item->distances, NULL, NULL); pfree(item); } while (so->nPageData == 0); } } }
void ginNewScanKey(IndexScanDesc scan) { ScanKey scankey = scan->keyData; GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; bool hasNullQuery = false; MemoryContext oldCtx; /* * Allocate all the scan key information in the key context. (If * extractQuery leaks anything there, it won't be reset until the end of * scan or rescan, but that's OK.) */ oldCtx = MemoryContextSwitchTo(so->keyCtx); /* if no scan keys provided, allocate extra EVERYTHING GinScanKey */ so->keys = (GinScanKey) palloc(Max(scan->numberOfKeys, 1) * sizeof(GinScanKeyData)); so->nkeys = 0; /* initialize expansible array of GinScanEntry pointers */ so->totalentries = 0; so->allocentries = 32; so->entries = (GinScanEntry *) palloc(so->allocentries * sizeof(GinScanEntry)); so->isVoidRes = false; for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = &scankey[i]; Datum *queryValues; int32 nQueryValues = 0; bool *partial_matches = NULL; Pointer *extra_data = NULL; bool *nullFlags = NULL; int32 searchMode = GIN_SEARCH_MODE_DEFAULT; /* * We assume that GIN-indexable operators are strict, so a null query * argument means an unsatisfiable query. */ if (skey->sk_flags & SK_ISNULL) { so->isVoidRes = true; break; } /* OK to call the extractQueryFn */ queryValues = (Datum *) DatumGetPointer(FunctionCall7Coll(&so->ginstate.extractQueryFn[skey->sk_attno - 1], so->ginstate.supportCollation[skey->sk_attno - 1], skey->sk_argument, PointerGetDatum(&nQueryValues), UInt16GetDatum(skey->sk_strategy), PointerGetDatum(&partial_matches), PointerGetDatum(&extra_data), PointerGetDatum(&nullFlags), PointerGetDatum(&searchMode))); /* * If bogus searchMode is returned, treat as GIN_SEARCH_MODE_ALL; note * in particular we don't allow extractQueryFn to select * GIN_SEARCH_MODE_EVERYTHING. */ if (searchMode < GIN_SEARCH_MODE_DEFAULT || searchMode > GIN_SEARCH_MODE_ALL) searchMode = GIN_SEARCH_MODE_ALL; /* Non-default modes require the index to have placeholders */ if (searchMode != GIN_SEARCH_MODE_DEFAULT) hasNullQuery = true; /* * In default mode, no keys means an unsatisfiable query. */ if (queryValues == NULL || nQueryValues <= 0) { if (searchMode == GIN_SEARCH_MODE_DEFAULT) { so->isVoidRes = true; break; } nQueryValues = 0; /* ensure sane value */ } /* * If the extractQueryFn didn't create a nullFlags array, create one, * assuming that everything's non-null. Otherwise, run through the * array and make sure each value is exactly 0 or 1; this ensures * binary compatibility with the GinNullCategory representation. While * at it, detect whether any null keys are present. */ if (nullFlags == NULL) nullFlags = (bool *) palloc0(nQueryValues * sizeof(bool)); else { int32 j; for (j = 0; j < nQueryValues; j++) { if (nullFlags[j]) { nullFlags[j] = true; /* not any other nonzero value */ hasNullQuery = true; } } } /* now we can use the nullFlags as category codes */ ginFillScanKey(so, skey->sk_attno, skey->sk_strategy, searchMode, skey->sk_argument, nQueryValues, queryValues, (GinNullCategory *) nullFlags, partial_matches, extra_data); } /* * If there are no regular scan keys, generate an EVERYTHING scankey to * drive a full-index scan. */ if (so->nkeys == 0 && !so->isVoidRes) { hasNullQuery = true; ginFillScanKey(so, FirstOffsetNumber, InvalidStrategy, GIN_SEARCH_MODE_EVERYTHING, (Datum) 0, 0, NULL, NULL, NULL, NULL); } /* * If the index is version 0, it may be missing null and placeholder * entries, which would render searches for nulls and full-index scans * unreliable. Throw an error if so. */ if (hasNullQuery && !so->isVoidRes) { GinStatsData ginStats; ginGetStats(scan->indexRelation, &ginStats); if (ginStats.ginVersion < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("old GIN indexes do not support whole-index scans nor searches for nulls"), errhint("To fix this, do REINDEX INDEX \"%s\".", RelationGetRelationName(scan->indexRelation)))); } MemoryContextSwitchTo(oldCtx); pgstat_count_index_scan(scan->indexRelation); }
/* * Execute the index scan. * * This works by reading index TIDs from the revmap, and obtaining the index * tuples pointed to by them; the summary values in the index tuples are * compared to the scan keys. We return into the TID bitmap all the pages in * ranges corresponding to index tuples that match the scan keys. * * If a TID from the revmap is read as InvalidTID, we know that range is * unsummarized. Pages in those ranges need to be returned regardless of scan * keys. */ int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { Relation idxRel = scan->indexRelation; Buffer buf = InvalidBuffer; BrinDesc *bdesc; Oid heapOid; Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; BlockNumber heapBlk; int totalpages = 0; FmgrInfo *consistentFn; MemoryContext oldcxt; MemoryContext perRangeCxt; opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); /* * We need to know the size of the table so that we know how long to * iterate on the revmap. */ heapOid = IndexGetRelation(RelationGetRelid(idxRel), false); heapRel = heap_open(heapOid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(heapRel); heap_close(heapRel, AccessShareLock); /* * Make room for the consistent support procedures of indexed columns. We * don't look them up here; we do that lazily the first time we see a scan * key reference each of them. We rely on zeroing fn_oid to InvalidOid. */ consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts); /* * Setup and use a per-range memory context, which is reset every time we * loop below. This avoids having to free the tuples within the loop. */ perRangeCxt = AllocSetContextCreate(CurrentMemoryContext, "bringetbitmap cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(perRangeCxt); /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full * view of the table. */ for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; BrinTuple *tup; OffsetNumber off; Size size; CHECK_FOR_INTERRUPTS(); MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE, scan->xs_snapshot); if (tup) { tup = brin_copy_tuple(tup, size); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* * For page ranges with no indexed tuple, we must return the whole * range; otherwise, compare it to the scan keys. */ if (tup == NULL) { addrange = true; } else { BrinMemTuple *dtup; dtup = brin_deform_tuple(bdesc, tup); if (dtup->bt_placeholder) { /* * Placeholder tuples are always returned, regardless of the * values stored in them. */ addrange = true; } else { int keyno; /* * Compare scan keys with summary values stored for the range. * If scan keys are matched, the page range must be added to * the bitmap. We initially assume the range needs to be * added; in particular this serves the case where there are * no keys. */ addrange = true; for (keyno = 0; keyno < scan->numberOfKeys; keyno++) { ScanKey key = &scan->keyData[keyno]; AttrNumber keyattno = key->sk_attno; BrinValues *bval = &dtup->bt_columns[keyattno - 1]; Datum add; /* * The collation of the scan key must match the collation * used in the index column (but only if the search is not * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using * this index ... */ Assert((key->sk_flags & SK_ISNULL) || (key->sk_collation == bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation)); /* First time this column? look up consistent function */ if (consistentFn[keyattno - 1].fn_oid == InvalidOid) { FmgrInfo *tmp; tmp = index_getprocinfo(idxRel, keyattno, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyattno - 1], tmp, CurrentMemoryContext); } /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added * to the output bitmap. * * When there are multiple scan keys, failure to meet the * criteria for a single one of them is enough to discard * the range as a whole, so break out of the loop as soon * as a false return value is obtained. */ add = FunctionCall3Coll(&consistentFn[keyattno - 1], key->sk_collation, PointerGetDatum(bdesc), PointerGetDatum(bval), PointerGetDatum(key)); addrange = DatumGetBool(add); if (!addrange) break; } } } /* add the pages in the range to the output bitmap, if needed */ if (addrange) { BlockNumber pageno; for (pageno = heapBlk; pageno <= heapBlk + opaque->bo_pagesPerRange - 1; pageno++) { MemoryContextSwitchTo(oldcxt); tbm_add_page(tbm, pageno); totalpages++; MemoryContextSwitchTo(perRangeCxt); } } } MemoryContextSwitchTo(oldcxt); MemoryContextDelete(perRangeCxt); if (buf != InvalidBuffer) ReleaseBuffer(buf); /* * XXX We have an approximation of the number of *pages* that our scan * returns, but we don't have a precise idea of the number of heap tuples * involved. */ return totalpages * 10; }
static bool rtnext(IndexScanDesc s, ScanDirection dir) { Page p; OffsetNumber n; RTreePageOpaque po; RTreeScanOpaque so; so = (RTreeScanOpaque) s->opaque; if (!ItemPointerIsValid(&(s->currentItemData))) { /* first call: start at the root */ Assert(BufferIsValid(so->curbuf) == false); so->curbuf = ReadBuffer(s->indexRelation, P_ROOT); pgstat_count_index_scan(&s->xs_pgstat_info); } p = BufferGetPage(so->curbuf); po = (RTreePageOpaque) PageGetSpecialPointer(p); if (!ItemPointerIsValid(&(s->currentItemData))) { /* first call: start at first/last offset */ if (ScanDirectionIsForward(dir)) n = FirstOffsetNumber; else n = PageGetMaxOffsetNumber(p); } else { /* go on to the next offset */ n = ItemPointerGetOffsetNumber(&(s->currentItemData)); if (ScanDirectionIsForward(dir)) n = OffsetNumberNext(n); else n = OffsetNumberPrev(n); } for (;;) { IndexTuple it; RTSTACK *stk; n = findnext(s, n, dir); /* no match on this page, so read in the next stack entry */ if (n == InvalidOffsetNumber) { /* if out of stack entries, we're done */ if (so->s_stack == NULL) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; return false; } stk = so->s_stack; so->curbuf = ReleaseAndReadBuffer(so->curbuf, s->indexRelation, stk->rts_blk); p = BufferGetPage(so->curbuf); po = (RTreePageOpaque) PageGetSpecialPointer(p); if (ScanDirectionIsBackward(dir)) n = OffsetNumberPrev(stk->rts_child); else n = OffsetNumberNext(stk->rts_child); so->s_stack = stk->rts_parent; pfree(stk); continue; } if (po->flags & F_LEAF) { ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(so->curbuf), n); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); s->xs_ctup.t_self = it->t_tid; return true; } else { BlockNumber blk; stk = (RTSTACK *) palloc(sizeof(RTSTACK)); stk->rts_child = n; stk->rts_blk = BufferGetBlockNumber(so->curbuf); stk->rts_parent = so->s_stack; so->s_stack = stk; it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); blk = ItemPointerGetBlockNumber(&(it->t_tid)); /* * Note that we release the pin on the page as we descend down the * tree, even though there's a good chance we'll eventually need * to re-read the buffer later in this scan. This may or may not * be optimal, but it doesn't seem likely to make a huge * performance difference either way. */ so->curbuf = ReleaseAndReadBuffer(so->curbuf, s->indexRelation, blk); p = BufferGetPage(so->curbuf); po = (RTreePageOpaque) PageGetSpecialPointer(p); if (ScanDirectionIsBackward(dir)) n = PageGetMaxOffsetNumber(p); else n = FirstOffsetNumber; } } }
/* * Fetch a tuples that matchs the search key; this can be invoked * either to fetch the first such tuple or subsequent matching * tuples. Returns true iff a matching tuple was found. */ static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples) { MIRROREDLOCK_BUFMGR_DECLARE; Page p; OffsetNumber n; GISTScanOpaque so; GISTSearchStack *stk; IndexTuple it; GISTPageOpaque opaque; int ntids = 0; so = (GISTScanOpaque) scan->opaque; // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; if ( so->qual_ok == false ) return 0; if (ItemPointerIsValid(&so->curpos) == false) { /* Being asked to fetch the first entry, so start at the root */ Assert(so->curbuf == InvalidBuffer); Assert(so->stack == NULL); so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO); stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack)); stk->next = NULL; stk->block = GIST_ROOT_BLKNO; pgstat_count_index_scan(scan->indexRelation); } else if (so->curbuf == InvalidBuffer) { MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return 0; } /* * check stored pointers from last visit */ if ( so->nPageData > 0 ) { while( ntids < maxtids && so->curPageData < so->nPageData ) { tids[ ntids ] = scan->xs_ctup.t_self = so->pageData[ so->curPageData ].heapPtr; ItemPointerSet(&(so->curpos), BufferGetBlockNumber(so->curbuf), so->pageData[ so->curPageData ].pageOffset); so->curPageData ++; ntids++; } if ( ntids == maxtids ) { MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; } /* * Go to the next page */ stk = so->stack->next; pfree(so->stack); so->stack = stk; /* If we're out of stack entries, we're done */ if (so->stack == NULL) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; } so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, stk->block); } for (;;) { /* First of all, we need lock buffer */ Assert(so->curbuf != InvalidBuffer); LockBuffer(so->curbuf, GIST_SHARE); gistcheckpage(scan->indexRelation, so->curbuf); p = BufferGetPage(so->curbuf); opaque = GistPageGetOpaque(p); /* remember lsn to identify page changed for tuple's killing */ so->stack->lsn = PageGetLSN(p); /* check page split, occured from last visit or visit to parent */ if (!XLogRecPtrIsInvalid(so->stack->parentlsn) && XLByteLT(so->stack->parentlsn, opaque->nsn) && opaque->rightlink != InvalidBlockNumber /* sanity check */ && (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already added */ ) { /* detect page split, follow right link to add pages */ stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); stk->next = so->stack->next; stk->block = opaque->rightlink; stk->parentlsn = so->stack->parentlsn; memset(&(stk->lsn), 0, sizeof(GistNSN)); so->stack->next = stk; } /* if page is empty, then just skip it */ if (PageIsEmpty(p)) { LockBuffer(so->curbuf, GIST_UNLOCK); stk = so->stack->next; pfree(so->stack); so->stack = stk; if (so->stack == NULL) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; } so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, stk->block); continue; } if (ScanDirectionIsBackward(dir)) n = PageGetMaxOffsetNumber(p); else n = FirstOffsetNumber; /* wonderful, we can look at page */ so->nPageData = so->curPageData = 0; for (;;) { n = gistfindnext(scan, n, dir); if (!OffsetNumberIsValid(n)) { while( ntids < maxtids && so->curPageData < so->nPageData ) { tids[ ntids ] = scan->xs_ctup.t_self = so->pageData[ so->curPageData ].heapPtr; ItemPointerSet(&(so->curpos), BufferGetBlockNumber(so->curbuf), so->pageData[ so->curPageData ].pageOffset); so->curPageData ++; ntids++; } if ( ntids == maxtids ) { LockBuffer(so->curbuf, GIST_UNLOCK); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; } /* * We ran out of matching index entries on the current page, * so pop the top stack entry and use it to continue the * search. */ LockBuffer(so->curbuf, GIST_UNLOCK); stk = so->stack->next; pfree(so->stack); so->stack = stk; /* If we're out of stack entries, we're done */ if (so->stack == NULL) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; } so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, stk->block); /* XXX go up */ break; } if (GistPageIsLeaf(p)) { /* * We've found a matching index entry in a leaf page, so * return success. Note that we keep "curbuf" pinned so that * we can efficiently resume the index scan later. */ if (!(ignore_killed_tuples && ItemIdIsDead(PageGetItemId(p, n)))) { it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); so->pageData[ so->nPageData ].heapPtr = it->t_tid; so->pageData[ so->nPageData ].pageOffset = n; so->nPageData ++; } } else { /* * We've found an entry in an internal node whose key is * consistent with the search key, so push it to stack */ stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); stk->block = ItemPointerGetBlockNumber(&(it->t_tid)); memset(&(stk->lsn), 0, sizeof(GistNSN)); stk->parentlsn = so->stack->lsn; stk->next = so->stack->next; so->stack->next = stk; } if (ScanDirectionIsBackward(dir)) n = OffsetNumberPrev(n); else n = OffsetNumberNext(n); } } MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return ntids; }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). The index TID corresponding * to the heap tuple can be obtained if needed from scan->currentItemData. * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { HeapTuple heapTuple = &scan->xs_ctup; SCAN_CHECKS; /* Release any previously held pin */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } /* * If we already got a tuple and it must be unique, there's no need to * make the index AM look through any additional tuples. (This can * save a useful amount of work in scenarios where there are many dead * tuples due to heavy update activity.) * * To do this we must keep track of the logical scan position * (before/on/after tuple). Also, we have to be sure to release scan * resources before returning NULL; if we fail to do so then a * multi-index scan can easily run the system out of free buffers. We * can release index-level resources fairly cheaply by calling * index_rescan. This means there are two persistent states as far as * the index AM is concerned: on-tuple and rescanned. If we are * actually asked to re-fetch the single tuple, we have to go through * a fresh indexscan startup, which penalizes that (infrequent) case. */ if (scan->keys_are_unique && scan->got_tuple) { int new_tuple_pos = scan->unique_tuple_pos; if (ScanDirectionIsForward(direction)) { if (new_tuple_pos <= 0) new_tuple_pos++; } else { if (new_tuple_pos >= 0) new_tuple_pos--; } if (new_tuple_pos == 0) { /* * We are moving onto the unique tuple from having been off * it. We just fall through and let the index AM do the work. * Note we should get the right answer regardless of scan * direction. */ scan->unique_tuple_pos = 0; /* need to update position */ } else { /* * Moving off the tuple; must do amrescan to release * index-level pins before we return NULL. Since index_rescan * will reset my state, must save and restore... */ int unique_tuple_mark = scan->unique_tuple_mark; index_rescan(scan, NULL /* no change to key */ ); scan->keys_are_unique = true; scan->got_tuple = true; scan->unique_tuple_pos = new_tuple_pos; scan->unique_tuple_mark = unique_tuple_mark; return NULL; } } /* just make sure this is false... */ scan->kill_prior_tuple = false; for (;;) { bool found; uint16 sv_infomask; pgstat_count_index_scan(&scan->xs_pgstat_info); /* * The AM's gettuple proc finds the next tuple matching the scan * keys. index_beginscan already set up fn_getnext. */ found = DatumGetBool(FunctionCall2(&scan->fn_getnext, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; if (!found) return NULL; /* failure exit */ /* * Fetch the heap tuple and see if it matches the snapshot. */ if (heap_fetch(scan->heapRelation, scan->xs_snapshot, heapTuple, &scan->xs_cbuf, true, &scan->xs_pgstat_info)) break; /* Skip if no tuple at this location */ if (heapTuple->t_data == NULL) continue; /* should we raise an error instead? */ /* * If we can't see it, maybe no one else can either. Check to see * if the tuple is dead to all transactions. If so, signal the * index AM to not return it on future indexscans. * * We told heap_fetch to keep a pin on the buffer, so we can * re-access the tuple here. But we must re-lock the buffer * first. Also, it's just barely possible for an update of hint * bits to occur here. */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); sv_infomask = heapTuple->t_data->t_infomask; if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin) == HEAPTUPLE_DEAD) scan->kill_prior_tuple = true; if (sv_infomask != heapTuple->t_data->t_infomask) SetBufferCommitInfoNeedsSave(scan->xs_cbuf); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } /* Success exit */ scan->got_tuple = true; /* * If we just fetched a known-unique tuple, then subsequent calls will * go through the short-circuit code above. unique_tuple_pos has been * initialized to 0, which is the correct state ("on row"). */ pgstat_count_index_getnext(&scan->xs_pgstat_info); return heapTuple; }