/* * init_scankeys * * Initialize the scan keys. */ static void init_scankeys(TupleDesc tupleDesc, int nkeys, ScanKey scanKeys, StrategyNumber *strategyNumbers) { int keyNo; Assert(nkeys <= tupleDesc->natts); for (keyNo = 0; keyNo < nkeys; keyNo ++) { ScanKey scanKey = (ScanKey)(((char *)scanKeys) + keyNo * sizeof(ScanKeyData)); RegProcedure opfuncid; StrategyNumber strategyNumber = strategyNumbers[keyNo]; Assert(strategyNumber <= BTMaxStrategyNumber && strategyNumber != InvalidStrategy); if (strategyNumber == BTEqualStrategyNumber) { opfuncid = equality_oper_funcid(tupleDesc->attrs[keyNo]->atttypid); ScanKeyEntryInitialize(scanKey, 0, /* sk_flag */ keyNo + 1, /* attribute number to scan */ BTEqualStrategyNumber, /* strategy */ InvalidOid, /* strategy subtype */ opfuncid, /* reg proc to use */ 0 /* constant */ ); } else { Oid gtOid, leOid; gtOid = reverse_ordering_oper_opid(tupleDesc->attrs[keyNo]->atttypid); leOid = get_negator(gtOid); opfuncid = get_opcode(leOid); ScanKeyEntryInitialize(scanKey, 0, /* sk_flag */ keyNo + 1, /* attribute number to scan */ strategyNumber, /* strategy */ InvalidOid, /* strategy subtype */ opfuncid, /* reg proc to use */ 0 /* constant */ ); } } }
/* *-------------------------------------------------------------- * Async_UnlistenAll * * Unlisten all relations for this backend. * * This is invoked by UNLISTEN "*" command, and also at backend exit. * * Results: * XXX * * Side effects: * pg_listener is updated. * *-------------------------------------------------------------- */ static void Async_UnlistenAll(void) { Relation lRel; TupleDesc tdesc; HeapScanDesc scan; HeapTuple lTuple; ScanKeyData key[1]; if (Trace_notify) elog(DEBUG1, "Async_UnlistenAll"); lRel = heap_openr(ListenerRelationName, ExclusiveLock); tdesc = RelationGetDescr(lRel); /* Find and delete all entries with my listenerPID */ ScanKeyEntryInitialize(&key[0], 0, Anum_pg_listener_pid, F_INT4EQ, Int32GetDatum(MyProcPid)); scan = heap_beginscan(lRel, SnapshotNow, 1, key); while ((lTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) simple_heap_delete(lRel, &lTuple->t_self); heap_endscan(scan); heap_close(lRel, ExclusiveLock); }
/* ---------- * toast_delete_datum - * * Delete a single external stored value. * ---------- */ static void toast_delete_datum(Relation rel, Datum value) { varattrib *attr = (varattrib *) DatumGetPointer(value); Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple toasttup; if (!VARATT_IS_EXTERNAL(attr)) return; /* * Open the toast relation and it's index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, RowExclusiveLock); toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index by va_valueid (we don't * particularly care whether we see them in sequence or not) */ ScanKeyEntryInitialize(&toastkey, (bits16) 0, (AttrNumber) 1, (RegProcedure) F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Find the chunks by index */ toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((toasttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, delete it */ simple_heap_delete(toastrel, &toasttup->t_self); } /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, RowExclusiveLock); }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; int32 chunksize; int32 chcpystrt; int32 chcpyend; attrsize = attr->va_content.va_external.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (varattrib *) palloc(length + VARHDRSZ); VARATT_SIZEP(result) = length + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; if (length == 0) return (result); /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and it's index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index. This is either two keys * or three depending on the number of chunks. */ ScanKeyEntryInitialize(&toastkey[0], (bits16) 0, (AttrNumber) 1, (RegProcedure) F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Now dependent on number of chunks: */ if (numchunks == 1) { ScanKeyEntryInitialize(&toastkey[1], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyEntryInitialize(&toastkey[1], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4GE, Int32GetDatum(startchunk)); ScanKeyEntryInitialize(&toastkey[2], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(((char *) VARATT_DATA(result)) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, VARATT_DATA(chunk) + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum(varattrib *attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; int32 chunksize; ressize = attr->va_content.va_external.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (varattrib *) palloc(ressize + VARHDRSZ); VARATT_SIZEP(result) = ressize + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; /* * Open the toast relation and its index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyEntryInitialize(&toastkey, (bits16) 0, (AttrNumber) 1, (RegProcedure) F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly * ask for it. */ nextidx = 0; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else if (residx < numchunks) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else elog(ERROR, "unexpected chunk number %d for toast value %u", residx, attr->va_content.va_external.va_valueid); /* * Copy the data into proper place in our result */ memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE, VARATT_DATA(chunk), chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* * -------------------------------------------------------------- * ProcessIncomingNotify * * Deal with arriving NOTIFYs from other backends. * This is called either directly from the SIGUSR2 signal handler, * or the next time control reaches the outer idle loop. * Scan pg_listener for arriving notifies, report them to my front end, * and clear the notification field in pg_listener until next time. * * NOTE: since we are outside any transaction, we must create our own. * * Results: * XXX * * -------------------------------------------------------------- */ static void ProcessIncomingNotify(void) { Relation lRel; TupleDesc tdesc; ScanKeyData key[1]; HeapScanDesc scan; HeapTuple lTuple, rTuple; Datum value[Natts_pg_listener]; char repl[Natts_pg_listener], nulls[Natts_pg_listener]; if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify"); set_ps_display("async_notify"); notifyInterruptOccurred = 0; StartTransactionCommand(); lRel = heap_openr(ListenerRelationName, ExclusiveLock); tdesc = RelationGetDescr(lRel); /* Scan only entries with my listenerPID */ ScanKeyEntryInitialize(&key[0], 0, Anum_pg_listener_pid, F_INT4EQ, Int32GetDatum(MyProcPid)); scan = heap_beginscan(lRel, SnapshotNow, 1, key); /* Prepare data for rewriting 0 into notification field */ nulls[0] = nulls[1] = nulls[2] = ' '; repl[0] = repl[1] = repl[2] = ' '; repl[Anum_pg_listener_notify - 1] = 'r'; value[0] = value[1] = value[2] = (Datum) 0; value[Anum_pg_listener_notify - 1] = Int32GetDatum(0); while ((lTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_listener listener = (Form_pg_listener) GETSTRUCT(lTuple); char *relname = NameStr(listener->relname); int32 sourcePID = listener->notification; if (sourcePID != 0) { /* Notify the frontend */ if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify: received %s from %d", relname, (int) sourcePID); NotifyMyFrontEnd(relname, sourcePID); /* * Rewrite the tuple with 0 in notification column. * * simple_heap_update is safe here because no one else would * have tried to UNLISTEN us, so there can be no uncommitted * changes. */ rTuple = heap_modifytuple(lTuple, lRel, value, nulls, repl); simple_heap_update(lRel, &lTuple->t_self, rTuple); #ifdef NOT_USED /* currently there are no indexes */ CatalogUpdateIndexes(lRel, rTuple); #endif } } heap_endscan(scan); /* * We do NOT release the lock on pg_listener here; we need to hold it * until end of transaction (which is about to happen, anyway) to * ensure that other backends see our tuple updates when they look. * Otherwise, a transaction started after this one might mistakenly * think it doesn't need to send this backend a new NOTIFY. */ heap_close(lRel, NoLock); CommitTransactionCommand(); /* * Must flush the notify messages to ensure frontend gets them * promptly. */ pq_flush(); set_ps_display("idle"); if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify: done"); }
/* * regtypein - converts "typename" to type OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_type entry. * * In bootstrap mode the name must just equal some existing name in pg_type. * In normal mode the type name can be specified using the full type syntax * recognized by the parser; for example, DOUBLE PRECISION and INTEGER[] will * work and be translated to the correct type names. (We ignore any typmod * info generated by the parser, however.) */ Datum regtypein(PG_FUNCTION_ARGS) { char *typ_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; int32 typmod; /* '-' ? */ if (strcmp(typ_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (typ_name_or_oid[0] >= '0' && typ_name_or_oid[0] <= '9' && strspn(typ_name_or_oid, "0123456789") == strlen(typ_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(typ_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a type name, possibly schema-qualified or decorated */ /* * In bootstrap mode we assume the given name is not schema-qualified, * and just search pg_type for a match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, * and certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyEntryInitialize(&skey[0], 0x0, (AttrNumber) Anum_pg_type_typname, (RegProcedure) F_NAMEEQ, CStringGetDatum(typ_name_or_oid)); hdesc = heap_openr(TypeRelationName, AccessShareLock); sysscan = systable_beginscan(hdesc, TypeNameNspIndex, true, SnapshotNow, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("type \"%s\" does not exist", typ_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: invoke the full parser to deal with special cases such * as array syntax. */ parseTypeString(typ_name_or_oid, &result, &typmod); PG_RETURN_OID(result); }
/* ---------------- * UpdateStats * * Update pg_class' relpages and reltuples statistics for the given relation * (which can be either a table or an index). Note that this is not used * in the context of VACUUM. * ---------------- */ void UpdateStats(Oid relid, double reltuples) { Relation whichRel; Relation pg_class; HeapTuple tuple; BlockNumber relpages; Form_pg_class rd_rel; HeapScanDesc pg_class_scan = NULL; bool in_place_upd; /* * This routine handles updates for both the heap and index relation * statistics. In order to guarantee that we're able to *see* the * index relation tuple, we bump the command counter id here. The * index relation tuple was created in the current transaction. */ CommandCounterIncrement(); /* * CommandCounterIncrement() flushes invalid cache entries, including * those for the heap and index relations for which we're updating * statistics. Now that the cache is flushed, it's safe to open the * relation again. We need the relation open in order to figure out * how many blocks it contains. */ /* * Grabbing lock here is probably redundant ... */ whichRel = relation_open(relid, ShareLock); /* * Find the tuple to update in pg_class. Normally we make a copy of * the tuple using the syscache, modify it, and apply heap_update. * But in bootstrap mode we can't use heap_update, so we cheat and * overwrite the tuple in-place. * * We also must cheat if reindexing pg_class itself, because the * target index may presently not be part of the set of indexes that * CatalogUpdateIndexes would update (see reindex_relation). In this * case the stats updates will not be WAL-logged and so could be lost * in a crash. This seems OK considering VACUUM does the same thing. */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); in_place_upd = IsBootstrapProcessingMode() || ReindexIsProcessingHeap(RelationGetRelid(pg_class)); if (!in_place_upd) { tuple = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relid), 0, 0, 0); } else { ScanKeyData key[1]; ScanKeyEntryInitialize(&key[0], 0, ObjectIdAttributeNumber, F_OIDEQ, ObjectIdGetDatum(relid)); pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); } if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for relation %u", relid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); /* * Figure values to insert. * * If we found zero tuples in the scan, do NOT believe it; instead put a * bogus estimate into the statistics fields. Otherwise, the common * pattern "CREATE TABLE; CREATE INDEX; insert data" leaves the table * with zero size statistics until a VACUUM is done. The optimizer * will generate very bad plans if the stats claim the table is empty * when it is actually sizable. See also CREATE TABLE in heap.c. * * Note: this path is also taken during bootstrap, because bootstrap.c * passes reltuples = 0 after loading a table. We have to estimate * some number for reltuples based on the actual number of pages. */ relpages = RelationGetNumberOfBlocks(whichRel); if (reltuples == 0) { if (relpages == 0) { /* Bogus defaults for a virgin table, same as heap.c */ reltuples = 1000; relpages = 10; } else if (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2) { /* Empty index, leave bogus defaults in place */ reltuples = 1000; } else reltuples = ((double) relpages) * NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts); } /* * Update statistics in pg_class, if they changed. (Avoiding an * unnecessary update is not just a tiny performance improvement; it * also reduces the window wherein concurrent CREATE INDEX commands * may conflict.) */ if (rd_rel->relpages != (int32) relpages || rd_rel->reltuples != (float4) reltuples) { if (in_place_upd) { /* Bootstrap or reindex case: overwrite fields in place. */ LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE); rd_rel->relpages = (int32) relpages; rd_rel->reltuples = (float4) reltuples; LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK); WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(pg_class, tuple); } else { /* During normal processing, must work harder. */ rd_rel->relpages = (int32) relpages; rd_rel->reltuples = (float4) reltuples; simple_heap_update(pg_class, &tuple->t_self, tuple); CatalogUpdateIndexes(pg_class, tuple); } } if (!pg_class_scan) heap_freetuple(tuple); else heap_endscan(pg_class_scan); /* * We shouldn't have to do this, but we do... Modify the reldesc in * place with the new values so that the cache contains the latest * copy. (XXX is this really still necessary? The relcache will get * fixed at next CommandCounterIncrement, so why bother here?) */ whichRel->rd_rel->relpages = (int32) relpages; whichRel->rd_rel->reltuples = (float4) reltuples; heap_close(pg_class, RowExclusiveLock); relation_close(whichRel, NoLock); }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the direction of scan, the search * conditions, and the tree ordering. We find the first item (or, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, the page containing * the current index tuple is pinned but not locked, and data about * the matching tuple(s) on the page has been loaded into so->currPos. * scan->xs_ctup.t_self is set to the heap TID of the current tuple, * and if requested, scan->xs_itup points to a copy of the index tuple. * * If there are no matching items in the index, we return FALSE, with no * pins or locks held. * * Note that scan->keyData[], and the so->keyData[] scankey built from it, * are both search-type scankeys (see nbtree/README for more about this). * Within this routine, we build a temporary insertion-type scankey to use * in locating the scan start position. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; BTStack stack; OffsetNumber offnum; StrategyNumber strat; bool nextkey; bool goback; ScanKey startKeys[INDEX_MAX_KEYS]; ScanKeyData scankeys[INDEX_MAX_KEYS]; ScanKeyData notnullkeys[INDEX_MAX_KEYS]; int keysCount = 0; int i; StrategyNumber strat_total; BTScanPosItem *currItem; pgstat_count_index_scan(rel); /* * Examine the scan keys and eliminate any redundant keys; also mark the * keys that must be matched to continue the scan. */ _bt_preprocess_keys(scan); /* * Quit now if _bt_preprocess_keys() discovered that the scan keys can * never be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /*---------- * Examine the scan keys to discover where we need to start the scan. * * We want to identify the keys that can be used as starting boundaries; * these are =, >, or >= keys for a forward scan or =, <, <= keys for * a backwards scan. We can use keys for multiple attributes so long as * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept * a > or < boundary or find an attribute with no boundary (which can be * thought of as the same as "> -infinity"), we can't use keys for any * attributes to its right, because it would break our simplistic notion * of what initial positioning strategy to use. * * When the scan keys include cross-type operators, _bt_preprocess_keys * may not be able to eliminate redundant keys; in such cases we will * arbitrarily pick a usable one for each attribute. This is correct * but possibly not optimal behavior. (For example, with keys like * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when * x=5 would be more efficient.) Since the situation only arises given * a poorly-worded query plus an incomplete opfamily, live with it. * * When both equality and inequality keys appear for a single attribute * (again, only possible when cross-type operators appear), we *must* * select one of the equality keys for the starting point, because * _bt_checkkeys() will stop the scan as soon as an equality qual fails. * For example, if we have keys like "x >= 4 AND x = 10" and we elect to * start at x=4, we will fail and stop before reaching x=10. If multiple * equality quals survive preprocessing, however, it doesn't matter which * one we use --- by definition, they are either redundant or * contradictory. * * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier. * If the index stores nulls at the end of the index we'll be starting * from, and we have no boundary key for the column (which means the key * we deduced NOT NULL from is an inequality key that constrains the other * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to * use as a boundary key. If we didn't do this, we might find ourselves * traversing a lot of null entries at the start of the scan. * * In this loop, row-comparison keys are treated the same as keys on their * first (leftmost) columns. We'll add on lower-order columns of the row * comparison below, if possible. * * The selected scan keys (at most one per index column) are remembered by * storing their addresses into the local startKeys[] array. *---------- */ strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { AttrNumber curattr; ScanKey chosen; ScanKey impliesNN; ScanKey cur; /* * chosen is the so-far-chosen key for the current attribute, if any. * We don't cast the decision in stone until we reach keys for the * next attribute. */ curattr = 1; chosen = NULL; /* Also remember any scankey that implies a NOT NULL constraint */ impliesNN = NULL; /* * Loop iterates from 0 to numberOfKeys inclusive; we use the last * pass to handle after-last-key processing. Actual exit from the * loop is at one of the "break" statements below. */ for (cur = so->keyData, i = 0;; cur++, i++) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { /* * Done looking at keys for curattr. If we didn't find a * usable boundary key, see if we can deduce a NOT NULL key. */ if (chosen == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { /* Yes, so build the key in notnullkeys[keysCount] */ chosen = ¬nullkeys[keysCount]; ScanKeyEntryInitialize(chosen, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), curattr, ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? BTGreaterStrategyNumber : BTLessStrategyNumber), InvalidOid, InvalidOid, InvalidOid, (Datum) 0); } /* * If we still didn't find a usable boundary key, quit; else * save the boundary key pointer in startKeys. */ if (chosen == NULL) break; startKeys[keysCount++] = chosen; /* * Adjust strat_total, and quit if we have stored a > or < * key. */ strat = chosen->sk_strategy; if (strat != BTEqualStrategyNumber) { strat_total = strat; if (strat == BTGreaterStrategyNumber || strat == BTLessStrategyNumber) break; } /* * Done if that was the last attribute, or if next key is not * in sequence (implying no boundary key is available for the * next attribute). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) break; /* * Reset for next attr. */ curattr = cur->sk_attno; chosen = NULL; impliesNN = NULL; } /* * Can we use this key as a starting boundary for this attr? * * If not, does it imply a NOT NULL constraint? (Because * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber, * *any* inequality key works for that; we need not test.) */ switch (cur->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsBackward(dir)) chosen = cur; else impliesNN = cur; } break; case BTEqualStrategyNumber: /* override any non-equality choice */ chosen = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsForward(dir)) chosen = cur; else impliesNN = cur; } break; } } } /* * If we found no usable boundary keys, we have to start from one end of * the tree. Walk down that edge to the first or last key, and scan from * there. */ if (keysCount == 0) return _bt_endpoint(scan, dir); /* * We want to start the scan somewhere within the index. Set up an * insertion scankey we can use to search for the boundary point we * identified above. The insertion scankey is built in the local * scankeys[] array, using the keys identified by startKeys[]. */ Assert(keysCount <= INDEX_MAX_KEYS); for (i = 0; i < keysCount; i++) { ScanKey cur = startKeys[i]; Assert(cur->sk_attno == i + 1); if (cur->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead. * * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function), but we have to watch * out for nulls, which _bt_preprocess_keys didn't check. A null * in the first row member makes the condition unmatchable, just * like qual_ok = false. */ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) return false; memcpy(scankeys + i, subkey, sizeof(ScanKeyData)); /* * If the row comparison is the last positioning key we accepted, * try to add additional keys from the lower-order row members. * (If we accepted independent conditions on additional index * columns, we use those instead --- doesn't seem worth trying to * determine which is more restrictive.) Note that this is OK * even if the row comparison is of ">" or "<" type, because the * condition applied to all but the last row member is effectively * ">=" or "<=", and so the extra keys don't break the positioning * scheme. But, by the same token, if we aren't able to use all * the row members, then the part of the row comparison that we * did use has to be treated as just a ">=" or "<=" condition, and * so we'd better adjust strat_total accordingly. */ if (i == keysCount - 1) { bool used_all_subkeys = false; Assert(!(subkey->sk_flags & SK_ROW_END)); for (;;) { subkey++; Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_attno != keysCount + 1) break; /* out-of-sequence, can't use it */ if (subkey->sk_strategy != cur->sk_strategy) break; /* wrong direction, can't use it */ if (subkey->sk_flags & SK_ISNULL) break; /* can't use null keys */ Assert(keysCount < INDEX_MAX_KEYS); memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData)); keysCount++; if (subkey->sk_flags & SK_ROW_END) { used_all_subkeys = true; break; } } if (!used_all_subkeys) { switch (strat_total) { case BTLessStrategyNumber: strat_total = BTLessEqualStrategyNumber; break; case BTGreaterStrategyNumber: strat_total = BTGreaterEqualStrategyNumber; break; } } break; /* done with outer loop */ } } else { /* * Ordinary comparison key. Transform the search-style scan key * to an insertion scan key by replacing the sk_func with the * appropriate btree comparison function. * * If scankey operator is not a cross-type comparison, we can use * the cached comparison function; otherwise gotta look it up in * the catalogs. (That can't lead to infinite recursion, since no * indexscan initiated by syscache lookup will use cross-data-type * operators.) * * We support the convention that sk_subtype == InvalidOid means * the opclass input type; this is a hack to simplify life for * ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[i] || cur->sk_subtype == InvalidOid) { FmgrInfo *procinfo; procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, procinfo, cur->sk_argument); } else { RegProcedure cmp_proc; cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], rel->rd_opcintype[i], cur->sk_subtype, BTORDER_PROC); if (!RegProcedureIsValid(cmp_proc)) elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype, cur->sk_attno, RelationGetRelationName(rel)); ScanKeyEntryInitialize(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, cmp_proc, cur->sk_argument); } } } /*---------- * Examine the selected initial-positioning strategy to determine exactly * where we need to start the scan, and set flag variables to control the * code below. * * If nextkey = false, _bt_search and _bt_binsrch will locate the first * item >= scan key. If nextkey = true, they will locate the first * item > scan key. * * If goback = true, we will then step back one item, while if * goback = false, we will start the scan on the located item. *---------- */ switch (strat_total) { case BTLessStrategyNumber: /* * Find first item >= scankey, then back up one to arrive at last * item < scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = false; goback = true; break; case BTLessEqualStrategyNumber: /* * Find first item > scankey, then back up one to arrive at last * item <= scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = true; goback = true; break; case BTEqualStrategyNumber: /* * If a backward scan was specified, need to start with last equal * item not first one. */ if (ScanDirectionIsBackward(dir)) { /* * This is the same as the <= strategy. We will check at the * end whether the found item is actually =. */ nextkey = true; goback = true; } else { /* * This is the same as the >= strategy. We will check at the * end whether the found item is actually =. */ nextkey = false; goback = false; } break; case BTGreaterEqualStrategyNumber: /* * Find first item >= scankey. (This is only used for forward * scans.) */ nextkey = false; goback = false; break; case BTGreaterStrategyNumber: /* * Find first item > scankey. (This is only used for forward * scans.) */ nextkey = true; goback = false; break; default: /* can't get here, but keep compiler quiet */ elog(ERROR, "unrecognized strat_total: %d", (int) strat_total); return false; } /* * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); /* remember which buffer we have pinned, if any */ so->currPos.buf = buf; if (!BufferIsValid(buf)) { /* * We only get here if the index is completely empty. Lock relation * because nothing finer to lock exists. */ PredicateLockRelation(rel, scan->xs_snapshot); return false; } else PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); /* initialize moreLeft/moreRight appropriately for scan direction */ if (ScanDirectionIsForward(dir)) { so->currPos.moreLeft = false; so->currPos.moreRight = true; } else { so->currPos.moreLeft = true; so->currPos.moreRight = false; } so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey); /* * If nextkey = false, we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are less * than the scan key and we know that everything on later pages is greater * than or equal to scan key. * * If nextkey = true, we are positioned at the first item > scan key, or * possibly at the end of a page on which all the existing items are less * than or equal to the scan key and we know that everything on later * pages is greater than scan key. * * The actually desired starting point is either this item or the prior * one, or in the end-of-page case it's the first item on the next page or * the last item on this page. Adjust the starting offset if needed. (If * this results in an offset before the first item or after the last one, * _bt_readpage will report no items found, and then we'll step to the * next page as needed.) */ if (goback) offnum = OffsetNumberPrev(offnum); /* * Now load data from the first page of the scan. */ if (!_bt_readpage(scan, dir, offnum)) { /* * There's no actually-matching data on this page. Try to advance to * the next page. Return false if there's no matching data at all. */ if (!_bt_steppage(scan, dir)) return false; } /* Drop the lock, but not pin, on the current page */ LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_ctup.t_self = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; }
/* * _bitmap_init_buildstate() -- initialize the build state before building * a bitmap index. */ void _bitmap_init_buildstate(Relation index, BMBuildState *bmstate) { MIRROREDLOCK_BUFMGR_DECLARE; BMMetaPage mp; HASHCTL hash_ctl; int hash_flags; int i; Buffer metabuf; /* initialize the build state */ bmstate->bm_tupDesc = RelationGetDescr(index); bmstate->bm_tidLocsBuffer = (BMTidBuildBuf *) palloc(sizeof(BMTidBuildBuf)); bmstate->bm_tidLocsBuffer->byte_size = 0; bmstate->bm_tidLocsBuffer->lov_blocks = NIL; bmstate->bm_tidLocsBuffer->max_lov_block = InvalidBlockNumber; // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; metabuf = _bitmap_getbuf(index, BM_METAPAGE, BM_READ); mp = _bitmap_get_metapage_data(index, metabuf); _bitmap_open_lov_heapandindex(index, mp, &(bmstate->bm_lov_heap), &(bmstate->bm_lov_index), RowExclusiveLock); _bitmap_relbuf(metabuf); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- cur_bmbuild = (BMBuildHashData *)palloc(sizeof(BMBuildHashData)); cur_bmbuild->hash_funcs = (FmgrInfo *) palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); cur_bmbuild->eq_funcs = (FmgrInfo *) palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); cur_bmbuild->hash_func_is_strict = (bool *) palloc(sizeof(bool) * bmstate->bm_tupDesc->natts); for (i = 0; i < bmstate->bm_tupDesc->natts; i++) { Oid typid = bmstate->bm_tupDesc->attrs[i]->atttypid; Operator optup; Oid eq_opr; Oid eq_function; Oid left_hash_function; Oid right_hash_function; optup = equality_oper(typid, false); eq_opr = oprid(optup); eq_function = oprfuncid(optup); ReleaseOperator(optup); if (!get_op_hash_functions(eq_opr, &left_hash_function, &right_hash_function)) { pfree(cur_bmbuild); cur_bmbuild = NULL; break; } Assert(left_hash_function == right_hash_function); fmgr_info(eq_function, &cur_bmbuild->eq_funcs[i]); fmgr_info(right_hash_function, &cur_bmbuild->hash_funcs[i]); cur_bmbuild->hash_func_is_strict[i] = func_strict(right_hash_function); } if (cur_bmbuild) { cur_bmbuild->natts = bmstate->bm_tupDesc->natts; cur_bmbuild->tmpcxt = AllocSetContextCreate(CurrentMemoryContext, "Bitmap build temp space", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* setup the hash table */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); /** * Reserve enough space for the hash key header and then the data segments (values followed by nulls) */ hash_ctl.keysize = MAXALIGN(sizeof(BMBuildHashKey)) + MAXALIGN(sizeof(Datum) * cur_bmbuild->natts) + MAXALIGN(sizeof(bool) * cur_bmbuild->natts); hash_ctl.entrysize = hash_ctl.keysize + sizeof(BMBuildLovData) + 200; hash_ctl.hash = build_hash_key; hash_ctl.match = build_match_key; hash_ctl.keycopy = build_keycopy; hash_ctl.hcxt = AllocSetContextCreate(CurrentMemoryContext, "Bitmap build hash table", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); cur_bmbuild->hash_cxt = hash_ctl.hcxt; hash_flags = HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT | HASH_KEYCOPY; bmstate->lovitem_hash = hash_create("Bitmap index build lov item hash", 100, &hash_ctl, hash_flags); bmstate->lovitem_hashKeySize = hash_ctl.keysize; } else { int attno; bmstate->lovitem_hash = NULL; bmstate->lovitem_hashKeySize = 0; bmstate->bm_lov_scanKeys = (ScanKey)palloc0(bmstate->bm_tupDesc->natts * sizeof(ScanKeyData)); for (attno = 0; attno < bmstate->bm_tupDesc->natts; attno++) { RegProcedure opfuncid; Oid atttypid; atttypid = bmstate->bm_tupDesc->attrs[attno]->atttypid; opfuncid = equality_oper_funcid(atttypid); ScanKeyEntryInitialize(&(bmstate->bm_lov_scanKeys[attno]), SK_ISNULL, attno + 1, BTEqualStrategyNumber, InvalidOid, opfuncid, 0); } bmstate->bm_lov_scanDesc = index_beginscan(bmstate->bm_lov_heap, bmstate->bm_lov_index, ActiveSnapshot, bmstate->bm_tupDesc->natts, bmstate->bm_lov_scanKeys); } /* * We need to log index creation in WAL iff WAL archiving is enabled * AND it's not a temp index. Currently, since building an index * writes page to the shared buffer, we can't disable WAL archiving. * We will add this shortly. */ bmstate->use_wal = !XLog_UnconvertedCanBypassWal() && !index->rd_istemp; }
/* * index-info-- * Retrieves catalog information on an index on a given relation. * * The index relation is opened on the first invocation. The current * retrieves the next index relation within the catalog that has not * already been retrieved by a previous call. The index catalog * is closed when no more indices for 'relid' can be found. * * 'first' is 1 if this is the first call * * Returns true if successful and false otherwise. Index info is returned * via the transient data structure 'info'. * */ bool index_info(Query *root, bool first, int relid, IdxInfoRetval *info) { register i; HeapTuple indexTuple, amopTuple; IndexTupleForm index; Relation indexRelation; uint16 amstrategy; Oid relam; Oid indrelid; static Relation relation = (Relation) NULL; static HeapScanDesc scan = (HeapScanDesc) NULL; static ScanKeyData indexKey; /* find the oid of the indexed relation */ indrelid = getrelid(relid, root->rtable); memset(info, 0, sizeof(IdxInfoRetval)); /* * the maximum number of elements in each of the following arrays is * 8. We allocate one more for a terminating 0 to indicate the end * of the array. */ info->indexkeys = (int *)palloc(sizeof(int)*9); memset(info->indexkeys, 0, sizeof(int)*9); info->orderOprs = (Oid *)palloc(sizeof(Oid)*9); memset(info->orderOprs, 0, sizeof(Oid)*9); info->classlist = (Oid *)palloc(sizeof(Oid)*9); memset(info->classlist, 0, sizeof(Oid)*9); /* Find an index on the given relation */ if (first) { if (RelationIsValid(relation)) heap_close(relation); if (HeapScanIsValid(scan)) heap_endscan(scan); ScanKeyEntryInitialize(&indexKey, 0, Anum_pg_index_indrelid, F_OIDEQ, ObjectIdGetDatum(indrelid)); relation = heap_openr(IndexRelationName); scan = heap_beginscan(relation, 0, NowTimeQual, 1, &indexKey); } if (!HeapScanIsValid(scan)) elog(WARN, "index_info: scan not started"); indexTuple = heap_getnext(scan, 0, (Buffer *) NULL); if (!HeapTupleIsValid(indexTuple)) { heap_endscan(scan); heap_close(relation); scan = (HeapScanDesc) NULL; relation = (Relation) NULL; return(0); } /* Extract info from the index tuple */ index = (IndexTupleForm)GETSTRUCT(indexTuple); info->relid = index->indexrelid; /* index relation */ for (i = 0; i < 8; i++) info->indexkeys[i] = index->indkey[i]; for (i = 0; i < 8; i++) info->classlist[i] = index->indclass[i]; info->indproc = index->indproc; /* functional index ?? */ /* partial index ?? */ if (VARSIZE(&index->indpred) != 0) { /* * The memory allocated here for the predicate (in lispReadString) * only needs to stay around until it's used in find_index_paths, * which is all within a command, so the automatic pfree at end * of transaction should be ok. */ char *predString; predString = fmgr(F_TEXTOUT, &index->indpred); info->indpred = (Node*)stringToNode(predString); pfree(predString); } /* Extract info from the relation descriptor for the index */ indexRelation = index_open(index->indexrelid); #ifdef notdef /* XXX should iterate through strategies -- but how? use #1 for now */ amstrategy = indexRelation->rd_am->amstrategies; #endif /* notdef */ amstrategy = 1; relam = indexRelation->rd_rel->relam; info->relam = relam; info->pages = indexRelation->rd_rel->relpages; info->tuples = indexRelation->rd_rel->reltuples; heap_close(indexRelation); /* * Find the index ordering keys * * Must use indclass to know when to stop looking since with * functional indices there could be several keys (args) for * one opclass. -mer 27 Sept 1991 */ for (i = 0; i < 8 && index->indclass[i]; ++i) { amopTuple = SearchSysCacheTuple(AMOPSTRATEGY, ObjectIdGetDatum(relam), ObjectIdGetDatum(index->indclass[i]), UInt16GetDatum(amstrategy), 0); if (!HeapTupleIsValid(amopTuple)) elog(WARN, "index_info: no amop %d %d %d", relam, index->indclass[i], amstrategy); info->orderOprs[i] = ((Form_pg_amop)GETSTRUCT(amopTuple))->amopopr; } return(TRUE); }
/* * ExecIndexBuildScanKeys * Build the index scan keys from the index qualification expressions * * The index quals are passed to the index AM in the form of a ScanKey array. * This routine sets up the ScanKeys, fills in all constant fields of the * ScanKeys, and prepares information about the keys that have non-constant * comparison values. We divide index qual expressions into five types: * * 1. Simple operator with constant comparison value ("indexkey op constant"). * For these, we just fill in a ScanKey containing the constant value. * * 2. Simple operator with non-constant value ("indexkey op expression"). * For these, we create a ScanKey with everything filled in except the * expression value, and set up an IndexRuntimeKeyInfo struct to drive * evaluation of the expression at the right times. * * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)"). * For these, we create a header ScanKey plus a subsidiary ScanKey array, * as specified in access/skey.h. The elements of the row comparison * can have either constant or non-constant comparison values. * * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). For these, * we create a ScanKey with everything filled in except the comparison value, * and set up an IndexArrayKeyInfo struct to drive processing of the qual. * (Note that we treat all array-expressions as requiring runtime evaluation, * even if they happen to be constants.) * * 5. NullTest ("indexkey IS NULL"). We just fill in the ScanKey properly. * * Input params are: * * planstate: executor state node we are working for * index: the index we are building scan keys for * scanrelid: varno of the index's relation within current query * quals: indexquals expressions * * Output params are: * * *scanKeys: receives ptr to array of ScanKeys * *numScanKeys: receives number of scankeys * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none * *numRuntimeKeys: receives number of runtime keys * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none * *numArrayKeys: receives number of array keys * * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that * ScalarArrayOpExpr quals are not supported. */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, List *quals, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys) { ListCell *qual_cell; ScanKey scan_keys; IndexRuntimeKeyInfo *runtime_keys; IndexArrayKeyInfo *array_keys; int n_scan_keys; int extra_scan_keys; int n_runtime_keys; int n_array_keys; int j; /* * If there are any RowCompareExpr quals, we need extra ScanKey entries * for them, and possibly extra runtime-key entries. Count up what's * needed. (The subsidiary ScanKey arrays for the RowCompareExprs could * be allocated as separate chunks, but we have to count anyway to make * runtime_keys large enough, so might as well just do one palloc.) */ n_scan_keys = list_length(quals); extra_scan_keys = 0; foreach(qual_cell, quals) { if (IsA(lfirst(qual_cell), RowCompareExpr)) extra_scan_keys += list_length(((RowCompareExpr *) lfirst(qual_cell))->opnos); } scan_keys = (ScanKey) palloc((n_scan_keys + extra_scan_keys) * sizeof(ScanKeyData)); /* Allocate these arrays as large as they could possibly need to be */ runtime_keys = (IndexRuntimeKeyInfo *) palloc((n_scan_keys + extra_scan_keys) * sizeof(IndexRuntimeKeyInfo)); array_keys = (IndexArrayKeyInfo *) palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); n_runtime_keys = 0; n_array_keys = 0; /* * Below here, extra_scan_keys is index of first cell to use for next * RowCompareExpr */ extra_scan_keys = n_scan_keys; /* * for each opclause in the given qual, convert the opclause into a single * scan key */ j = 0; foreach(qual_cell, quals) { Expr *clause = (Expr *) lfirst(qual_cell); ScanKey this_scan_key = &scan_keys[j++]; Oid opno; /* operator's OID */ RegProcedure opfuncid; /* operator proc id used in scan */ Oid opfamily; /* opfamily of index column */ int op_strategy; /* operator's strategy number */ Oid op_lefttype; /* operator's declared input types */ Oid op_righttype; Expr *leftop; /* expr on lhs of operator */ Expr *rightop; /* expr on rhs ... */ AttrNumber varattno; /* att number used in scan */ if (IsA(clause, OpExpr)) { /* indexkey op const or indexkey op expression */ int flags = 0; Datum scanvalue; opno = ((OpExpr *) clause)->opno; opfuncid = ((OpExpr *) clause)->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) get_leftop(clause); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, &op_strategy, &op_lefttype, &op_righttype); /* * rightop is the constant or variable comparison value */ rightop = (Expr *) get_rightop(clause); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); n_runtime_keys++; scanvalue = (Datum) 0; } /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ } else if (IsA(clause, RowCompareExpr)) { /* (indexkey, indexkey, ...) op (expression, expression, ...) */ RowCompareExpr *rc = (RowCompareExpr *) clause; ListCell *largs_cell = list_head(rc->largs); ListCell *rargs_cell = list_head(rc->rargs); ListCell *opnos_cell = list_head(rc->opnos); ScanKey first_sub_key = &scan_keys[extra_scan_keys]; /* Scan RowCompare columns and generate subsidiary ScanKey items */ while (opnos_cell != NULL) { ScanKey this_sub_key = &scan_keys[extra_scan_keys]; int flags = SK_ROW_MEMBER; Datum scanvalue; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) lfirst(largs_cell); largs_cell = lnext(largs_cell); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; /* * rightop is the constant or variable comparison value */ rightop = (Expr *) lfirst(rargs_cell); rargs_cell = lnext(rargs_cell); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ runtime_keys[n_runtime_keys].scan_key = this_sub_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); n_runtime_keys++; scanvalue = (Datum) 0; } /* * We have to look up the operator's associated btree support * function */ opno = lfirst_oid(opnos_cell); opnos_cell = lnext(opnos_cell); if (index->rd_rel->relam != BTREE_AM_OID || varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, &op_strategy, &op_lefttype, &op_righttype); if (op_strategy != rc->rctype) elog(ERROR, "RowCompare index qualification contains wrong operator"); opfuncid = get_opfamily_proc(opfamily, op_lefttype, op_righttype, BTORDER_PROC); /* * initialize the subsidiary scan key's fields appropriately */ ScanKeyEntryInitialize(this_sub_key, flags, varattno, /* attribute number */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ extra_scan_keys++; } /* Mark the last subsidiary scankey correctly */ scan_keys[extra_scan_keys - 1].sk_flags |= SK_ROW_END; /* * We don't use ScanKeyEntryInitialize for the header because it * isn't going to contain a valid sk_func pointer. */ MemSet(this_scan_key, 0, sizeof(ScanKeyData)); this_scan_key->sk_flags = SK_ROW_HEADER; this_scan_key->sk_attno = first_sub_key->sk_attno; this_scan_key->sk_strategy = rc->rctype; /* sk_subtype, sk_func not used in a header */ this_scan_key->sk_argument = PointerGetDatum(first_sub_key); } else if (IsA(clause, ScalarArrayOpExpr)) { /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; Assert(saop->useOr); opno = saop->opno; opfuncid = saop->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) linitial(saop->args); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, &op_strategy, &op_lefttype, &op_righttype); /* * rightop is the constant or variable array value */ rightop = (Expr *) lsecond(saop->args); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); array_keys[n_array_keys].scan_key = this_scan_key; array_keys[n_array_keys].array_expr = ExecInitExpr(rightop, planstate); /* the remaining fields were zeroed by palloc0 */ n_array_keys++; /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, 0, /* flags */ varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ opfuncid, /* reg proc to use */ (Datum) 0); /* constant */ } else if (IsA(clause, NullTest)) { /* indexkey IS NULL */ Assert(((NullTest *) clause)->nulltesttype == IS_NULL); /* * argument should be the index key Var, possibly relabeled */ leftop = ((NullTest *) clause)->arg; if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, SK_ISNULL | SK_SEARCHNULL, varattno, /* attribute number to scan */ InvalidStrategy, /* no strategy */ InvalidOid, /* no strategy subtype */ InvalidOid, /* no reg proc for this */ (Datum) 0); /* constant */ } else elog(ERROR, "unsupported indexqual type: %d", (int) nodeTag(clause)); }
/* -------------------------------- * ReverifyMyDatabase * * Since we are forced to fetch the database OID out of pg_database without * benefit of locking or transaction ID checking (see utils/misc/database.c), * we might have gotten a wrong answer. Or, we might have attached to a * database that's in process of being destroyed by destroydb(). This * routine is called after we have all the locking and other infrastructure * running --- now we can check that we are really attached to a valid * database. * * In reality, if destroydb() is running in parallel with our startup, * it's pretty likely that we will have failed before now, due to being * unable to read some of the system tables within the doomed database. * This routine just exists to make *sure* we have not started up in an * invalid database. If we quit now, we should have managed to avoid * creating any serious problems. * * This is also a handy place to fetch the database encoding info out * of pg_database. * * To avoid having to read pg_database more times than necessary * during session startup, this place is also fitting to set up any * database-specific configuration variables. * -------------------------------- */ static void ReverifyMyDatabase(const char *name) { Relation pgdbrel; HeapScanDesc pgdbscan; ScanKeyData key; HeapTuple tup; Form_pg_database dbform; /* * Because we grab AccessShareLock here, we can be sure that destroydb * is not running in parallel with us (any more). */ pgdbrel = heap_openr(DatabaseRelationName, AccessShareLock); ScanKeyEntryInitialize(&key, 0, Anum_pg_database_datname, F_NAMEEQ, NameGetDatum(name)); pgdbscan = heap_beginscan(pgdbrel, SnapshotNow, 1, &key); tup = heap_getnext(pgdbscan, ForwardScanDirection); if (!HeapTupleIsValid(tup) || HeapTupleGetOid(tup) != MyDatabaseId) { /* OOPS */ heap_close(pgdbrel, AccessShareLock); /* * The only real problem I could have created is to load dirty * buffers for the dead database into shared buffer cache; if I * did, some other backend will eventually try to write them and * die in mdblindwrt. Flush any such pages to forestall trouble. */ DropBuffers(MyDatabaseId); /* Now I can commit hara-kiri with a clear conscience... */ ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\", OID %u, has disappeared from pg_database", name, MyDatabaseId))); } /* * Also check that the database is currently allowing connections. * (We do not enforce this in standalone mode, however, so that there is * a way to recover from "UPDATE pg_database SET datallowconn = false;") */ dbform = (Form_pg_database) GETSTRUCT(tup); if (IsUnderPostmaster && !dbform->datallowconn) ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("database \"%s\" is not currently accepting connections", name))); /* * OK, we're golden. Only other to-do item is to save the encoding * info out of the pg_database tuple. */ SetDatabaseEncoding(dbform->encoding); /* Record it as a GUC internal option, too */ SetConfigOption("server_encoding", GetDatabaseEncodingName(), PGC_INTERNAL, PGC_S_OVERRIDE); /* If we have no other source of client_encoding, use server encoding */ SetConfigOption("client_encoding", GetDatabaseEncodingName(), PGC_BACKEND, PGC_S_DEFAULT); /* * Set up database-specific configuration variables. */ if (IsUnderPostmaster) { Datum datum; bool isnull; datum = heap_getattr(tup, Anum_pg_database_datconfig, RelationGetDescr(pgdbrel), &isnull); if (!isnull) { ArrayType *a = DatumGetArrayTypeP(datum); ProcessGUCArray(a, PGC_S_DATABASE); } } heap_endscan(pgdbscan); heap_close(pgdbrel, AccessShareLock); }
/* ---------------- * set relhasindex of relation's pg_class entry * * If isprimary is TRUE, we are defining a primary index, so also set * relhaspkey to TRUE. Otherwise, leave relhaspkey alone. * * If reltoastidxid is not InvalidOid, also set reltoastidxid to that value. * This is only used for TOAST relations. * * NOTE: an important side-effect of this operation is that an SI invalidation * message is sent out to all backends --- including me --- causing relcache * entries to be flushed or updated with the new hasindex data. This must * happen even if we find that no change is needed in the pg_class row. * ---------------- */ void setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid) { Relation pg_class; HeapTuple tuple; Form_pg_class classtuple; bool dirty = false; HeapScanDesc pg_class_scan = NULL; /* * Find the tuple to update in pg_class. In bootstrap mode we can't * use heap_update, so cheat and overwrite the tuple in-place. In * normal processing, make a copy to scribble on. */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); if (!IsBootstrapProcessingMode()) { tuple = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relid), 0, 0, 0); } else { ScanKeyData key[1]; ScanKeyEntryInitialize(&key[0], 0, ObjectIdAttributeNumber, F_OIDEQ, ObjectIdGetDatum(relid)); pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); } if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for relation %u", relid); classtuple = (Form_pg_class) GETSTRUCT(tuple); /* Apply required updates */ if (pg_class_scan) LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE); if (classtuple->relhasindex != hasindex) { classtuple->relhasindex = hasindex; dirty = true; } if (isprimary) { if (!classtuple->relhaspkey) { classtuple->relhaspkey = true; dirty = true; } } if (OidIsValid(reltoastidxid)) { Assert(classtuple->relkind == RELKIND_TOASTVALUE); if (classtuple->reltoastidxid != reltoastidxid) { classtuple->reltoastidxid = reltoastidxid; dirty = true; } } if (pg_class_scan) LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK); if (pg_class_scan) { /* Write the modified tuple in-place */ WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); /* Send out shared cache inval if necessary */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(pg_class, tuple); BufferSync(); } else if (dirty) { simple_heap_update(pg_class, &tuple->t_self, tuple); /* Keep the catalog indexes up to date */ CatalogUpdateIndexes(pg_class, tuple); } else { /* no need to change tuple, but force relcache rebuild anyway */ CacheInvalidateRelcache(relid); } if (!pg_class_scan) heap_freetuple(tuple); else heap_endscan(pg_class_scan); heap_close(pg_class, RowExclusiveLock); }
/* * regprocin - converts "proname" to proc OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_proc entry. */ Datum regprocin(PG_FUNCTION_ARGS) { char *pro_name_or_oid = PG_GETARG_CSTRING(0); RegProcedure result = InvalidOid; List *names; FuncCandidateList clist; /* '-' ? */ if (strcmp(pro_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (pro_name_or_oid[0] >= '0' && pro_name_or_oid[0] <= '9' && strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(pro_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, * and just search pg_proc for a unique match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, * and certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyEntryInitialize(&skey[0], 0x0, (AttrNumber) Anum_pg_proc_proname, (RegProcedure) F_NAMEEQ, CStringGetDatum(pro_name_or_oid)); hdesc = heap_openr(ProcedureRelationName, AccessShareLock); sysscan = systable_beginscan(hdesc, ProcedureNameNspIndex, true, SnapshotNow, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { result = (RegProcedure) HeapTupleGetOid(tuple); if (++matches > 1) break; } systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); if (matches == 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (matches > 1) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches * any pg_proc entries in the current search path. */ names = stringToQualifiedNameList(pro_name_or_oid, "regprocin"); clist = FuncnameGetCandidates(names, -1); if (clist == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (clist->next != NULL) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); result = clist->oid; PG_RETURN_OID(result); }
Datum rtrescan(PG_FUNCTION_ARGS) { IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); RTreeScanOpaque p; RegProcedure internal_proc; int i; /* * Clear all the pointers. */ ItemPointerSetInvalid(&s->currentItemData); ItemPointerSetInvalid(&s->currentMarkData); p = (RTreeScanOpaque) s->opaque; if (p != (RTreeScanOpaque) NULL) { /* rescan an existing indexscan --- reset state */ freestack(p->s_stack); freestack(p->s_markstk); p->s_stack = p->s_markstk = (RTSTACK *) NULL; p->s_flags = 0x0; } else { /* initialize opaque data */ p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData)); p->s_stack = p->s_markstk = (RTSTACK *) NULL; p->s_internalNKey = s->numberOfKeys; p->s_flags = 0x0; s->opaque = p; if (s->numberOfKeys > 0) p->s_internalKey = (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys); } /* Update scan key, if a new one is given */ if (key && s->numberOfKeys > 0) { memmove(s->keyData, key, s->numberOfKeys * sizeof(ScanKeyData)); /* * Scans on internal pages use different operators than they do on * leaf pages. For example, if the user wants all boxes that * exactly match (x1,y1,x2,y2), then on internal pages we need to * find all boxes that contain (x1,y1,x2,y2). */ for (i = 0; i < s->numberOfKeys; i++) { internal_proc = RTMapOperator(s->indexRelation, s->keyData[i].sk_attno, s->keyData[i].sk_procedure); ScanKeyEntryInitialize(&(p->s_internalKey[i]), s->keyData[i].sk_flags, s->keyData[i].sk_attno, internal_proc, s->keyData[i].sk_argument); } } PG_RETURN_VOID(); }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, * and just search pg_class for a match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, * and certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyEntryInitialize(&skey[0], 0x0, (AttrNumber) Anum_pg_class_relname, (RegProcedure) F_NAMEEQ, CStringGetDatum(class_name_or_oid)); hdesc = heap_openr(RelationRelationName, AccessShareLock); sysscan = systable_beginscan(hdesc, ClassNameNspIndex, true, SnapshotNow, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches * any pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid, "regclassin"); result = RangeVarGetRelid(makeRangeVarFromNameList(names), false); PG_RETURN_OID(result); }
/* * ExecIndexBuildScanKeys * Build the index scan keys from the index qualification expressions * * The index quals are passed to the index AM in the form of a ScanKey array. * This routine sets up the ScanKeys, fills in all constant fields of the * ScanKeys, and prepares information about the keys that have non-constant * comparison values. We divide index qual expressions into five types: * * 1. Simple operator with constant comparison value ("indexkey op constant"). * For these, we just fill in a ScanKey containing the constant value. * * 2. Simple operator with non-constant value ("indexkey op expression"). * For these, we create a ScanKey with everything filled in except the * expression value, and set up an IndexRuntimeKeyInfo struct to drive * evaluation of the expression at the right times. * * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)"). * For these, we create a header ScanKey plus a subsidiary ScanKey array, * as specified in access/skey.h. The elements of the row comparison * can have either constant or non-constant comparison values. * * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). For these, * we create a ScanKey with everything filled in except the comparison value, * and set up an IndexArrayKeyInfo struct to drive processing of the qual. * (Note that we treat all array-expressions as requiring runtime evaluation, * even if they happen to be constants.) * * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the * ScanKey properly. * * This code is also used to prepare ORDER BY expressions for amcanorderbyop * indexes. The behavior is exactly the same, except that we have to look up * the operator differently. Note that only cases 1 and 2 are currently * possible for ORDER BY. * * Input params are: * * planstate: executor state node we are working for * index: the index we are building scan keys for * scanrelid: varno of the index's relation within current query * quals: indexquals (or indexorderbys) expressions * isorderby: true if processing ORDER BY exprs, false if processing quals * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none * *numRuntimeKeys: number of pre-existing runtime keys * * Output params are: * * *scanKeys: receives ptr to array of ScanKeys * *numScanKeys: receives number of scankeys * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none * *numRuntimeKeys: receives number of runtime keys * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none * *numArrayKeys: receives number of array keys * * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that * ScalarArrayOpExpr quals are not supported. */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys) { ListCell *qual_cell; ScanKey scan_keys; IndexRuntimeKeyInfo *runtime_keys; IndexArrayKeyInfo *array_keys; int n_scan_keys; int n_runtime_keys; int max_runtime_keys; int n_array_keys; int j; /* Allocate array for ScanKey structs: one per qual */ n_scan_keys = list_length(quals); scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData)); /* * runtime_keys array is dynamically resized as needed. We handle it this * way so that the same runtime keys array can be shared between * indexquals and indexorderbys, which will be processed in separate calls * of this function. Caller must be sure to pass in NULL/0 for first * call. */ runtime_keys = *runtimeKeys; n_runtime_keys = max_runtime_keys = *numRuntimeKeys; /* Allocate array_keys as large as it could possibly need to be */ array_keys = (IndexArrayKeyInfo *) palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); n_array_keys = 0; /* * for each opclause in the given qual, convert the opclause into a single * scan key */ j = 0; foreach(qual_cell, quals) { Expr *clause = (Expr *) lfirst(qual_cell); ScanKey this_scan_key = &scan_keys[j++]; Oid opno; /* operator's OID */ RegProcedure opfuncid; /* operator proc id used in scan */ Oid opfamily; /* opfamily of index column */ int op_strategy; /* operator's strategy number */ Oid op_lefttype; /* operator's declared input types */ Oid op_righttype; Expr *leftop; /* expr on lhs of operator */ Expr *rightop; /* expr on rhs ... */ AttrNumber varattno; /* att number used in scan */ if (IsA(clause, OpExpr)) { /* indexkey op const or indexkey op expression */ int flags = 0; Datum scanvalue; opno = ((OpExpr *) clause)->opno; opfuncid = ((OpExpr *) clause)->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) get_leftop(clause); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); if (isorderby) flags |= SK_ORDER_BY; /* * rightop is the constant or variable comparison value */ rightop = (Expr *) get_rightop(clause); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ if (n_runtime_keys >= max_runtime_keys) { if (max_runtime_keys == 0) { max_runtime_keys = 8; runtime_keys = (IndexRuntimeKeyInfo *) palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } else { max_runtime_keys *= 2; runtime_keys = (IndexRuntimeKeyInfo *) repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } } runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); runtime_keys[n_runtime_keys].key_toastable = TypeIsToastable(op_righttype); n_runtime_keys++; scanvalue = (Datum) 0; } /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ ((OpExpr *) clause)->inputcollid, /* collation */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ } else if (IsA(clause, RowCompareExpr)) { /* (indexkey, indexkey, ...) op (expression, expression, ...) */ RowCompareExpr *rc = (RowCompareExpr *) clause; ListCell *largs_cell = list_head(rc->largs); ListCell *rargs_cell = list_head(rc->rargs); ListCell *opnos_cell = list_head(rc->opnos); ListCell *collids_cell = list_head(rc->inputcollids); ScanKey first_sub_key; int n_sub_key; Assert(!isorderby); first_sub_key = (ScanKey) palloc(list_length(rc->opnos) * sizeof(ScanKeyData)); n_sub_key = 0; /* Scan RowCompare columns and generate subsidiary ScanKey items */ while (opnos_cell != NULL) { ScanKey this_sub_key = &first_sub_key[n_sub_key]; int flags = SK_ROW_MEMBER; Datum scanvalue; Oid inputcollation; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) lfirst(largs_cell); largs_cell = lnext(largs_cell); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; /* * We have to look up the operator's associated btree support * function */ opno = lfirst_oid(opnos_cell); opnos_cell = lnext(opnos_cell); if (index->rd_rel->relam != BTREE_AM_OID || varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); if (op_strategy != rc->rctype) elog(ERROR, "RowCompare index qualification contains wrong operator"); opfuncid = get_opfamily_proc(opfamily, op_lefttype, op_righttype, BTORDER_PROC); inputcollation = lfirst_oid(collids_cell); collids_cell = lnext(collids_cell); /* * rightop is the constant or variable comparison value */ rightop = (Expr *) lfirst(rargs_cell); rargs_cell = lnext(rargs_cell); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ if (n_runtime_keys >= max_runtime_keys) { if (max_runtime_keys == 0) { max_runtime_keys = 8; runtime_keys = (IndexRuntimeKeyInfo *) palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } else { max_runtime_keys *= 2; runtime_keys = (IndexRuntimeKeyInfo *) repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } } runtime_keys[n_runtime_keys].scan_key = this_sub_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); runtime_keys[n_runtime_keys].key_toastable = TypeIsToastable(op_righttype); n_runtime_keys++; scanvalue = (Datum) 0; } /* * initialize the subsidiary scan key's fields appropriately */ ScanKeyEntryInitialize(this_sub_key, flags, varattno, /* attribute number */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ inputcollation, /* collation */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ n_sub_key++; } /* Mark the last subsidiary scankey correctly */ first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END; /* * We don't use ScanKeyEntryInitialize for the header because it * isn't going to contain a valid sk_func pointer. */ MemSet(this_scan_key, 0, sizeof(ScanKeyData)); this_scan_key->sk_flags = SK_ROW_HEADER; this_scan_key->sk_attno = first_sub_key->sk_attno; this_scan_key->sk_strategy = rc->rctype; /* sk_subtype, sk_collation, sk_func not used in a header */ this_scan_key->sk_argument = PointerGetDatum(first_sub_key); } else if (IsA(clause, ScalarArrayOpExpr)) { /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; Assert(!isorderby); Assert(saop->useOr); opno = saop->opno; opfuncid = saop->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) linitial(saop->args); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); /* * rightop is the constant or variable array value */ rightop = (Expr *) lsecond(saop->args); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); array_keys[n_array_keys].scan_key = this_scan_key; array_keys[n_array_keys].array_expr = ExecInitExpr(rightop, planstate); /* the remaining fields were zeroed by palloc0 */ n_array_keys++; /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, 0, /* flags */ varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ saop->inputcollid, /* collation */ opfuncid, /* reg proc to use */ (Datum) 0); /* constant */ } else if (IsA(clause, NullTest)) { /* indexkey IS NULL or indexkey IS NOT NULL */ NullTest *ntest = (NullTest *) clause; int flags; Assert(!isorderby); /* * argument should be the index key Var, possibly relabeled */ leftop = ntest->arg; if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == scanrelid)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; /* * initialize the scan key's fields appropriately */ switch (ntest->nulltesttype) { case IS_NULL: flags = SK_ISNULL | SK_SEARCHNULL; break; case IS_NOT_NULL: flags = SK_ISNULL | SK_SEARCHNOTNULL; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) ntest->nulltesttype); flags = 0; /* keep compiler quiet */ break; } ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ InvalidStrategy, /* no strategy */ InvalidOid, /* no strategy subtype */ InvalidOid, /* no collation */ InvalidOid, /* no reg proc for this */ (Datum) 0); /* constant */ } else elog(ERROR, "unsupported indexqual type: %d", (int) nodeTag(clause)); }
/* * lookup_default_opclass * * Given the OIDs of a datatype and an access method, find the default * operator class, if any. Returns InvalidOid if there is none. */ static Oid lookup_default_opclass(Oid type_id, Oid am_id) { int nexact = 0; int ncompatible = 0; Oid exactOid = InvalidOid; Oid compatibleOid = InvalidOid; Relation rel; ScanKeyData skey[1]; SysScanDesc scan; HeapTuple tup; /* If it's a domain, look at the base type instead */ type_id = getBaseType(type_id); /* * We scan through all the opclasses available for the access method, * looking for one that is marked default and matches the target type * (either exactly or binary-compatibly, but prefer an exact match). * * We could find more than one binary-compatible match, in which case we * require the user to specify which one he wants. If we find more * than one exact match, then someone put bogus entries in pg_opclass. * * This is the same logic as GetDefaultOpClass() in indexcmds.c, except * that we consider all opclasses, regardless of the current search path. */ rel = heap_openr(OperatorClassRelationName, AccessShareLock); ScanKeyEntryInitialize(&skey[0], 0x0, Anum_pg_opclass_opcamid, F_OIDEQ, ObjectIdGetDatum(am_id)); scan = systable_beginscan(rel, OpclassAmNameNspIndex, true, SnapshotNow, 1, skey); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup); if (opclass->opcdefault) { if (opclass->opcintype == type_id) { nexact++; exactOid = HeapTupleGetOid(tup); } else if (IsBinaryCoercible(type_id, opclass->opcintype)) { ncompatible++; compatibleOid = HeapTupleGetOid(tup); } } } systable_endscan(scan); heap_close(rel, AccessShareLock); if (nexact == 1) return exactOid; if (nexact != 0) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("there are multiple default operator classes for data type %s", format_type_be(type_id)))); if (ncompatible == 1) return compatibleOid; return InvalidOid; }