/* * get_op_hash_function * Get the OID of the datatype-specific hash function associated with * a hashable equality operator. * * Returns InvalidOid if no hash function can be found. (This indicates * that the operator should not have been marked oprcanhash.) */ Oid get_op_hash_function(Oid opno) { CatCList *catlist; int i; Oid opclass = InvalidOid; /* * Search pg_amop to see if the target operator is registered as the "=" * operator of any hash opclass. If the operator is registered in * multiple opclasses, assume we can use the associated hash function from * any one. */ catlist = SearchSysCacheList(AMOPOPID, 1, ObjectIdGetDatum(opno), 0, 0, 0); for (i = 0; i < catlist->n_members; i++) { HeapTuple tuple = &catlist->members[i]->tuple; Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); if (aform->amopstrategy == HTEqualStrategyNumber && opclass_is_hash(aform->amopclaid)) { opclass = aform->amopclaid; break; } } ReleaseSysCacheList(catlist); if (OidIsValid(opclass)) { /* Found a suitable opclass, get its default hash support function */ return get_opclass_proc(opclass, InvalidOid, HASHPROC); } /* Didn't find a match... */ return InvalidOid; }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the direction of scan, the search * conditions, and the tree ordering. We find the first item (or, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, the page containing * the current index tuple is pinned but not locked, and data about * the matching tuple(s) on the page has been loaded into so->currPos, * and scan->xs_ctup.t_self is set to the heap TID of the current tuple. * * If there are no matching items in the index, we return FALSE, with no * pins or locks held. * * Note that scan->keyData[], and the so->keyData[] scankey built from it, * are both search-type scankeys (see nbtree/README for more about this). * Within this routine, we build a temporary insertion-type scankey to use * in locating the scan start position. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; BTStack stack; OffsetNumber offnum; StrategyNumber strat; bool nextkey; bool goback; ScanKey startKeys[INDEX_MAX_KEYS]; ScanKeyData scankeys[INDEX_MAX_KEYS]; int keysCount = 0; int i; StrategyNumber strat_total; pgstat_count_index_scan(&scan->xs_pgstat_info); /* * Examine the scan keys and eliminate any redundant keys; also mark the * keys that must be matched to continue the scan. */ _bt_preprocess_keys(scan); /* * Quit now if _bt_preprocess_keys() discovered that the scan keys can * never be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /*---------- * Examine the scan keys to discover where we need to start the scan. * * We want to identify the keys that can be used as starting boundaries; * these are =, >, or >= keys for a forward scan or =, <, <= keys for * a backwards scan. We can use keys for multiple attributes so long as * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept * a > or < boundary or find an attribute with no boundary (which can be * thought of as the same as "> -infinity"), we can't use keys for any * attributes to its right, because it would break our simplistic notion * of what initial positioning strategy to use. * * When the scan keys include non-default operators, _bt_preprocess_keys * may not be able to eliminate redundant keys; in such cases we will * arbitrarily pick a usable one for each attribute. This is correct * but possibly not optimal behavior. (For example, with keys like * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when * x=5 would be more efficient.) Since the situation only arises in * hokily-worded queries, live with it. * * When both equality and inequality keys appear for a single attribute * (again, only possible when non-default operators appear), we *must* * select one of the equality keys for the starting point, because * _bt_checkkeys() will stop the scan as soon as an equality qual fails. * For example, if we have keys like "x >= 4 AND x = 10" and we elect to * start at x=4, we will fail and stop before reaching x=10. If multiple * equality quals survive preprocessing, however, it doesn't matter which * one we use --- by definition, they are either redundant or * contradictory. * * In this loop, row-comparison keys are treated the same as keys on their * first (leftmost) columns. We'll add on lower-order columns of the row * comparison below, if possible. * * The selected scan keys (at most one per index column) are remembered by * storing their addresses into the local startKeys[] array. *---------- */ strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { AttrNumber curattr; ScanKey chosen; ScanKey cur; /* * chosen is the so-far-chosen key for the current attribute, if any. * We don't cast the decision in stone until we reach keys for the * next attribute. */ curattr = 1; chosen = NULL; /* * Loop iterates from 0 to numberOfKeys inclusive; we use the last * pass to handle after-last-key processing. Actual exit from the * loop is at one of the "break" statements below. */ for (cur = so->keyData, i = 0;; cur++, i++) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { /* * Done looking at keys for curattr. If we didn't find a * usable boundary key, quit; else save the boundary key * pointer in startKeys. */ if (chosen == NULL) break; startKeys[keysCount++] = chosen; /* * Adjust strat_total, and quit if we have stored a > or < * key. */ strat = chosen->sk_strategy; if (strat != BTEqualStrategyNumber) { strat_total = strat; if (strat == BTGreaterStrategyNumber || strat == BTLessStrategyNumber) break; } /* * Done if that was the last attribute, or if next key is not * in sequence (implying no boundary key is available for the * next attribute). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) break; /* * Reset for next attr. */ curattr = cur->sk_attno; chosen = NULL; } /* Can we use this key as a starting boundary for this attr? */ switch (cur->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: if (chosen == NULL && ScanDirectionIsBackward(dir)) chosen = cur; break; case BTEqualStrategyNumber: /* override any non-equality choice */ chosen = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: if (chosen == NULL && ScanDirectionIsForward(dir)) chosen = cur; break; } } } /* * If we found no usable boundary keys, we have to start from one end of * the tree. Walk down that edge to the first or last key, and scan from * there. */ if (keysCount == 0) return _bt_endpoint(scan, dir); /* * We want to start the scan somewhere within the index. Set up an * insertion scankey we can use to search for the boundary point we * identified above. The insertion scankey is built in the local * scankeys[] array, using the keys identified by startKeys[]. */ Assert(keysCount <= INDEX_MAX_KEYS); for (i = 0; i < keysCount; i++) { ScanKey cur = startKeys[i]; Assert(cur->sk_attno == i + 1); if (cur->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead. * * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function), but we have to watch * out for nulls, which _bt_preprocess_keys didn't check. A null * in the first row member makes the condition unmatchable, just * like qual_ok = false. */ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) return false; memcpy(scankeys + i, subkey, sizeof(ScanKeyData)); /* * If the row comparison is the last positioning key we accepted, * try to add additional keys from the lower-order row members. * (If we accepted independent conditions on additional index * columns, we use those instead --- doesn't seem worth trying to * determine which is more restrictive.) Note that this is OK * even if the row comparison is of ">" or "<" type, because the * condition applied to all but the last row member is effectively * ">=" or "<=", and so the extra keys don't break the positioning * scheme. But, by the same token, if we aren't able to use all * the row members, then the part of the row comparison that we * did use has to be treated as just a ">=" or "<=" condition, * and so we'd better adjust strat_total accordingly. */ if (i == keysCount - 1) { bool used_all_subkeys = false; Assert(!(subkey->sk_flags & SK_ROW_END)); for(;;) { subkey++; Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_attno != keysCount + 1) break; /* out-of-sequence, can't use it */ if (subkey->sk_strategy != cur->sk_strategy) break; /* wrong direction, can't use it */ if (subkey->sk_flags & SK_ISNULL) break; /* can't use null keys */ Assert(keysCount < INDEX_MAX_KEYS); memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData)); keysCount++; if (subkey->sk_flags & SK_ROW_END) { used_all_subkeys = true; break; } } if (!used_all_subkeys) { switch (strat_total) { case BTLessStrategyNumber: strat_total = BTLessEqualStrategyNumber; break; case BTGreaterStrategyNumber: strat_total = BTGreaterEqualStrategyNumber; break; } } break; /* done with outer loop */ } } else { /* * Ordinary comparison key. Transform the search-style scan key * to an insertion scan key by replacing the sk_func with the * appropriate btree comparison function. * * If scankey operator is of default subtype, we can use the * cached comparison function; otherwise gotta look it up in the * catalogs. */ if (cur->sk_subtype == InvalidOid) { FmgrInfo *procinfo; procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, InvalidOid, procinfo, cur->sk_argument); } else { RegProcedure cmp_proc; cmp_proc = get_opclass_proc(rel->rd_indclass->values[i], cur->sk_subtype, BTORDER_PROC); ScanKeyEntryInitialize(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cmp_proc, cur->sk_argument); } } } /*---------- * Examine the selected initial-positioning strategy to determine exactly * where we need to start the scan, and set flag variables to control the * code below. * * If nextkey = false, _bt_search and _bt_binsrch will locate the first * item >= scan key. If nextkey = true, they will locate the first * item > scan key. * * If goback = true, we will then step back one item, while if * goback = false, we will start the scan on the located item. * * it's yet other place to add some code later for is(not)null ... *---------- */ switch (strat_total) { case BTLessStrategyNumber: /* * Find first item >= scankey, then back up one to arrive at last * item < scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = false; goback = true; break; case BTLessEqualStrategyNumber: /* * Find first item > scankey, then back up one to arrive at last * item <= scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = true; goback = true; break; case BTEqualStrategyNumber: /* * If a backward scan was specified, need to start with last equal * item not first one. */ if (ScanDirectionIsBackward(dir)) { /* * This is the same as the <= strategy. We will check at the * end whether the found item is actually =. */ nextkey = true; goback = true; } else { /* * This is the same as the >= strategy. We will check at the * end whether the found item is actually =. */ nextkey = false; goback = false; } break; case BTGreaterEqualStrategyNumber: /* * Find first item >= scankey. (This is only used for forward * scans.) */ nextkey = false; goback = false; break; case BTGreaterStrategyNumber: /* * Find first item > scankey. (This is only used for forward * scans.) */ nextkey = true; goback = false; break; default: /* can't get here, but keep compiler quiet */ elog(ERROR, "unrecognized strat_total: %d", (int) strat_total); return false; } /* * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); /* remember which buffer we have pinned, if any */ so->currPos.buf = buf; if (!BufferIsValid(buf)) { /* Only get here if index is completely empty */ return false; } /* initialize moreLeft/moreRight appropriately for scan direction */ if (ScanDirectionIsForward(dir)) { so->currPos.moreLeft = false; so->currPos.moreRight = true; } else { so->currPos.moreLeft = true; so->currPos.moreRight = false; } so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey); /* * If nextkey = false, we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are less * than the scan key and we know that everything on later pages is greater * than or equal to scan key. * * If nextkey = true, we are positioned at the first item > scan key, or * possibly at the end of a page on which all the existing items are less * than or equal to the scan key and we know that everything on later * pages is greater than scan key. * * The actually desired starting point is either this item or the prior * one, or in the end-of-page case it's the first item on the next page or * the last item on this page. Adjust the starting offset if needed. (If * this results in an offset before the first item or after the last one, * _bt_readpage will report no items found, and then we'll step to the * next page as needed.) */ if (goback) offnum = OffsetNumberPrev(offnum); /* * Now load data from the first page of the scan. */ if (!_bt_readpage(scan, dir, offnum)) { /* * There's no actually-matching data on this page. Try to advance to * the next page. Return false if there's no matching data at all. */ if (!_bt_steppage(scan, dir)) return false; } /* Drop the lock, but not pin, on the current page */ LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid; return true; }
/* * lookup_type_cache * * Fetch the type cache entry for the specified datatype, and make sure that * all the fields requested by bits in 'flags' are valid. * * The result is never NULL --- we will elog() if the passed type OID is * invalid. Note however that we may fail to find one or more of the * requested opclass-dependent fields; the caller needs to check whether * the fields are InvalidOid or not. */ TypeCacheEntry * lookup_type_cache(Oid type_id, int flags) { TypeCacheEntry *typentry; bool found; if (TypeCacheHash == NULL) { /* First time through: initialize the hash table */ HASHCTL ctl; if (!CacheMemoryContext) CreateCacheMemoryContext(); MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(TypeCacheEntry); ctl.hash = tag_hash; TypeCacheHash = hash_create("Type information cache", 64, &ctl, HASH_ELEM | HASH_FUNCTION); } /* Try to look up an existing entry */ typentry = (TypeCacheEntry *) hash_search(TypeCacheHash, (void *) &type_id, HASH_FIND, NULL); if (typentry == NULL) { /* * If we didn't find one, we want to make one. But first get the * required info from the pg_type row, just to make sure we don't * make a cache entry for an invalid type OID. */ int16 typlen; bool typbyval; char typalign; get_typlenbyvalalign(type_id, &typlen, &typbyval, &typalign); typentry = (TypeCacheEntry *) hash_search(TypeCacheHash, (void *) &type_id, HASH_ENTER, &found); if (typentry == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); Assert(!found); /* it wasn't there a moment ago */ MemSet(typentry, 0, sizeof(TypeCacheEntry)); typentry->type_id = type_id; typentry->typlen = typlen; typentry->typbyval = typbyval; typentry->typalign = typalign; } /* If we haven't already found the opclass, try to do so */ if (flags != 0 && typentry->btree_opc == InvalidOid) { typentry->btree_opc = lookup_default_opclass(type_id, BTREE_AM_OID); /* Only care about hash opclass if no btree opclass... */ if (typentry->btree_opc == InvalidOid) { if (typentry->hash_opc == InvalidOid) typentry->hash_opc = lookup_default_opclass(type_id, HASH_AM_OID); } else { /* * If we find a btree opclass where previously we only found * a hash opclass, forget the hash equality operator so we * can use the btree operator instead. */ typentry->eq_opr = InvalidOid; typentry->eq_opr_finfo.fn_oid = InvalidOid; } } /* Look for requested operators and functions */ if ((flags & (TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO)) && typentry->eq_opr == InvalidOid) { if (typentry->btree_opc != InvalidOid) typentry->eq_opr = get_opclass_member(typentry->btree_opc, BTEqualStrategyNumber); if (typentry->eq_opr == InvalidOid && typentry->hash_opc != InvalidOid) typentry->eq_opr = get_opclass_member(typentry->hash_opc, HTEqualStrategyNumber); } if ((flags & TYPECACHE_LT_OPR) && typentry->lt_opr == InvalidOid) { if (typentry->btree_opc != InvalidOid) typentry->lt_opr = get_opclass_member(typentry->btree_opc, BTLessStrategyNumber); } if ((flags & TYPECACHE_GT_OPR) && typentry->gt_opr == InvalidOid) { if (typentry->btree_opc != InvalidOid) typentry->gt_opr = get_opclass_member(typentry->btree_opc, BTGreaterStrategyNumber); } if ((flags & (TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO)) && typentry->cmp_proc == InvalidOid) { if (typentry->btree_opc != InvalidOid) typentry->cmp_proc = get_opclass_proc(typentry->btree_opc, BTORDER_PROC); } /* * Set up fmgr lookup info as requested * * Note: we tell fmgr the finfo structures live in CacheMemoryContext, * which is not quite right (they're really in DynaHashContext) but this * will do for our purposes. */ if ((flags & TYPECACHE_EQ_OPR_FINFO) && typentry->eq_opr_finfo.fn_oid == InvalidOid && typentry->eq_opr != InvalidOid) { Oid eq_opr_func; eq_opr_func = get_opcode(typentry->eq_opr); if (eq_opr_func != InvalidOid) fmgr_info_cxt(eq_opr_func, &typentry->eq_opr_finfo, CacheMemoryContext); } if ((flags & TYPECACHE_CMP_PROC_FINFO) && typentry->cmp_proc_finfo.fn_oid == InvalidOid && typentry->cmp_proc != InvalidOid) { fmgr_info_cxt(typentry->cmp_proc, &typentry->cmp_proc_finfo, CacheMemoryContext); } return typentry; }