void initSpGistState(SpGistState *state, Relation index) { RegProcedure propOid; Assert(index->rd_att->natts == 1); propOid = index_getprocid(index, 1, SPGIST_PROP_PROC); state->prop = *(SpGistOpClassProp*)DatumGetPointer(OidFunctionCall0Coll(propOid, InvalidOid)); fillTypeDesc(&state->attType, state->prop.leafType); fillTypeDesc(&state->attNodeType, state->prop.nodeType); fillTypeDesc(&state->attPrefixType, state->prop.prefixType); fmgr_info_copy(&(state->chooseFn), index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->picksplitFn), index_getprocinfo(index, 1, SPGIST_PICKSPLIT_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->leafConsistentFn), index_getprocinfo(index, 1, SPGIST_LEAFCONS_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->innerConsistentFn), index_getprocinfo(index, 1, SPGIST_INNERCONS_PROC), CurrentMemoryContext); state->nodeTupDesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(state->nodeTupDesc, (AttrNumber) 1, NULL, state->attNodeType.type, -1, 0); }
static void initRtstate(RTSTATE *rtstate, Relation index) { fmgr_info_copy(&rtstate->unionFn, index_getprocinfo(index, 1, RT_UNION_PROC), CurrentMemoryContext); fmgr_info_copy(&rtstate->sizeFn, index_getprocinfo(index, 1, RT_SIZE_PROC), CurrentMemoryContext); fmgr_info_copy(&rtstate->interFn, index_getprocinfo(index, 1, RT_INTER_PROC), CurrentMemoryContext); }
void initGinState(GinState *state, Relation index) { int i; state->origTupdesc = index->rd_att; state->oneCol = (index->rd_att->natts == 1) ? true : false; for (i = 0; i < index->rd_att->natts; i++) { state->tupdesc[i] = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, INT2OID, -1, 0); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, index->rd_att->attrs[i]->atttypid, index->rd_att->attrs[i]->atttypmod, index->rd_att->attrs[i]->attndims ); fmgr_info_copy(&(state->compareFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->extractValueFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->extractQueryFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->consistentFn[i]), index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC), CurrentMemoryContext); /* * Check opclass capability to do partial match. */ if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid) { fmgr_info_copy(&(state->comparePartialFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC), CurrentMemoryContext); state->canPartialMatch[i] = true; } else { state->canPartialMatch[i] = false; } } }
/* * Given two deformed tuples, adjust the first one so that it's consistent * with the summary values in both. */ static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) { int keyno; BrinMemTuple *db; MemoryContext cxt; MemoryContext oldcxt; /* Use our own memory context to avoid retail pfree */ cxt = AllocSetContextCreate(CurrentMemoryContext, "brin union", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(cxt); db = brin_deform_tuple(bdesc, b); MemoryContextSwitchTo(oldcxt); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { FmgrInfo *unionFn; BrinValues *col_a = &a->bt_columns[keyno]; BrinValues *col_b = &db->bt_columns[keyno]; unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1, BRIN_PROCNUM_UNION); FunctionCall3Coll(unionFn, bdesc->bd_index->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(col_a), PointerGetDatum(col_b)); } MemoryContextDelete(cxt); }
/* * _bt_mkscankey_nodata * Build an insertion scan key that contains 3-way comparator routines * appropriate to the key datatypes, but no comparison data. The * comparison data ultimately used must match the key datatypes. * * The result cannot be used with _bt_compare(), unless comparison * data is first stored into the key entries. Currently this * routine is only called by nbtsort.c and tuplesort.c, which have * their own comparison routines. */ ScanKey _bt_mkscankey_nodata(Relation rel) { ScanKey skey; int natts; int16 *indoption; int i; natts = RelationGetNumberOfAttributes(rel); indoption = rel->rd_indoption; skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); for (i = 0; i < natts; i++) { FmgrInfo *procinfo; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT); ScanKeyEntryInitializeWithInfo(&skey[i], flags, (AttrNumber) (i + 1), InvalidStrategy, InvalidOid, procinfo, (Datum) 0); } return skey; }
/* * bt_mk_scankey_nodata * Build an insertion scan key that contains 3-way comparator routines * appropriate to the key datatypes, but no comparison data. The * comparison data ultimately used must match the key datatypes. * * The result cannot be used with bt_compare(), unless comparison * data is first stored into the key entries. Currently this * routine is only called by nbtsort.c and tuplesort.c, which have * their own comparison routines. */ struct scankey *bt_mk_scankey_nodata(struct relation *rel) { struct scankey *skey; int natts; int16 *indoption; int i; natts = REL_GET_NR_ATTR(rel); indoption = rel->rd_indoption; skey = (struct scankey *)palloc(natts * sizeof(struct scankey)); for (i = 0; i < natts; i++) { struct fmgr_info *procinfo; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BT_ORDER_PROC); flags = SK_ISNULL | (indoption[i] << SK_BT_INDEX_OPT_SHIFT); scankey_init_info(&skey[i], flags, (attr_nr_t)(i + 1), INVALID_STRAT, INVALID_OID, rel->rd_indcollation[i], procinfo, (datum_t) 0); } return skey; }
/* * Per-heap-tuple callback for IndexBuildHeapScan. * * Note we don't worry about the page range at the end of the table here; it is * present in the build state struct after we're called the last time, but not * inserted into the index. Caller must ensure to do so, if appropriate. */ static void brinbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate) { BrinBuildState *state = (BrinBuildState *) brstate; BlockNumber thisblock; int i; thisblock = ItemPointerGetBlockNumber(&htup->t_self); /* * If we're in a block that belongs to a future range, summarize what * we've got and start afresh. Note the scan might have skipped many * pages, if they were devoid of live tuples; make sure to insert index * tuples for those too. */ while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1) { BRIN_elog((DEBUG2, "brinbuildCallback: completed a range: %u--%u", state->bs_currRangeStart, state->bs_currRangeStart + state->bs_pagesPerRange)); /* create the index tuple and insert it */ form_and_insert_tuple(state); /* set state to correspond to the next range */ state->bs_currRangeStart += state->bs_pagesPerRange; /* re-initialize state for it */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); } /* Accumulate the current tuple into the running state */ for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++) { FmgrInfo *addValue; BrinValues *col; Form_pg_attribute attr = TupleDescAttr(state->bs_bdesc->bd_tupdesc, i); col = &state->bs_dtuple->bt_columns[i]; addValue = index_getprocinfo(index, i + 1, BRIN_PROCNUM_ADDVALUE); /* * Update dtuple state, if and as necessary. */ FunctionCall4Coll(addValue, attr->attcollation, PointerGetDatum(state->bs_bdesc), PointerGetDatum(col), values[i], isnull[i]); } }
/* * Build a BrinDesc used to create or scan a BRIN index */ BrinDesc * brin_build_desc(Relation rel) { BrinOpcInfo **opcinfo; BrinDesc *bdesc; TupleDesc tupdesc; int totalstored = 0; int keyno; long totalsize; MemoryContext cxt; MemoryContext oldcxt; cxt = AllocSetContextCreate(CurrentMemoryContext, "brin desc cxt", ALLOCSET_SMALL_INITSIZE, ALLOCSET_SMALL_MINSIZE, ALLOCSET_SMALL_MAXSIZE); oldcxt = MemoryContextSwitchTo(cxt); tupdesc = RelationGetDescr(rel); /* * Obtain BrinOpcInfo for each indexed column. While at it, accumulate * the number of columns stored, since the number is opclass-defined. */ opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts); for (keyno = 0; keyno < tupdesc->natts; keyno++) { FmgrInfo *opcInfoFn; opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO); opcinfo[keyno] = (BrinOpcInfo *) DatumGetPointer(FunctionCall1(opcInfoFn, tupdesc->attrs[keyno]->atttypid)); totalstored += opcinfo[keyno]->oi_nstored; } /* Allocate our result struct and fill it in */ totalsize = offsetof(BrinDesc, bd_info) + sizeof(BrinOpcInfo *) * tupdesc->natts; bdesc = palloc(totalsize); bdesc->bd_context = cxt; bdesc->bd_index = rel; bdesc->bd_tupdesc = tupdesc; bdesc->bd_disktdesc = NULL; /* generated lazily */ bdesc->bd_totalstored = totalstored; for (keyno = 0; keyno < tupdesc->natts; keyno++) bdesc->bd_info[keyno] = opcinfo[keyno]; pfree(opcinfo); MemoryContextSwitchTo(oldcxt); return bdesc; }
/* * _hash_datum2hashkey -- given a Datum, call the index's hash procedure * * The Datum is assumed to be of the index's column type, so we can use the * "primary" hash procedure that's tracked for us by the generic index code. */ uint32 _hash_datum2hashkey(Relation rel, Datum key) { FmgrInfo *procinfo; /* XXX assumes index has only one attribute */ procinfo = index_getprocinfo(rel, 1, HASHPROC); return DatumGetUInt32(FunctionCall1(procinfo, key)); }
/* * _hash_datum2hashkey -- given a Datum, call the index's hash procedure * * The Datum is assumed to be of the index's column type, so we can use the * "primary" hash procedure that's tracked for us by the generic index code. */ uint32 _hash_datum2hashkey(Relation rel, Datum key) { FmgrInfo *procinfo; Oid collation; /* XXX assumes index has only one attribute */ procinfo = index_getprocinfo(rel, 1, HASHPROC); collation = rel->rd_indcollation[0]; return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key)); }
/* * Fill BloomState structure for particular index. */ void initBloomState(BloomState *state, Relation index) { int i; state->nColumns = index->rd_att->natts; /* Initialize hash function for each attribute */ for (i = 0; i < index->rd_att->natts; i++) { fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ if (!index->rd_amcache) { Buffer buffer; Page page; BloomMetaPageData *meta; BloomOptions *opts; opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions)); buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); if (!BloomPageIsMeta(page)) elog(ERROR, "Relation is not a bloom index"); meta = BloomPageGetMeta(BufferGetPage(buffer)); if (meta->magickNumber != BLOOM_MAGICK_NUMBER) elog(ERROR, "Relation is not a bloom index"); *opts = meta->opts; UnlockReleaseBuffer(buffer); index->rd_amcache = (void *) opts; } memcpy(&state->opts, index->rd_amcache, sizeof(state->opts)); state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + sizeof(BloomSignatureWord) * state->opts.bloomLength; }
void initBloomState(BloomState *state, Relation index) { int i; state->nColumns = index->rd_att->natts; for (i = 0; i < index->rd_att->natts; i++) { fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); } if (!index->rd_amcache) { Buffer buffer; BloomMetaPageData *meta; BloomOptions *opts; opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions)); buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); LockBuffer(buffer, BUFFER_LOCK_SHARE); if (!BloomPageIsMeta(BufferGetPage(buffer))) elog(ERROR,"Relation is not a bloom index"); meta = BloomPageGetMeta(BufferGetPage(buffer)); if (meta->magickNumber != BLOOM_MAGICK_NUMBER) elog(ERROR,"Relation is not a bloom index"); *opts = meta->opts; UnlockReleaseBuffer(buffer); index->rd_amcache = (void*)opts; } state->opts = (BloomOptions*)index->rd_amcache; state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + sizeof(SignType) * state->opts->bloomLength; }
void initGISTstate(GISTSTATE *giststate, Relation index) { int i; if (index->rd_att->natts > INDEX_MAX_KEYS) elog(ERROR, "numberOfAttributes %d > %d", index->rd_att->natts, INDEX_MAX_KEYS); giststate->tupdesc = index->rd_att; for (i = 0; i < index->rd_att->natts; i++) { fmgr_info_copy(&(giststate->consistentFn[i]), index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->unionFn[i]), index_getprocinfo(index, i + 1, GIST_UNION_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->compressFn[i]), index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->decompressFn[i]), index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->penaltyFn[i]), index_getprocinfo(index, i + 1, GIST_PENALTY_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->picksplitFn[i]), index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->equalFn[i]), index_getprocinfo(index, i + 1, GIST_EQUAL_PROC), CurrentMemoryContext); fmgr_info_copy(&(giststate->distanceFn[i]), index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC), CurrentMemoryContext); } }
/* * _bt_mkscankey * Build an insertion scan key that contains comparison data from itup * as well as comparator routines appropriate to the key datatypes. * * The result is intended for use with _bt_compare(). */ ScanKey _bt_mkscankey(Relation rel, IndexTuple itup) { ScanKey skey; TupleDesc itupdesc; int natts; int16 *indoption; int i; itupdesc = RelationGetDescr(rel); natts = RelationGetNumberOfAttributes(rel); indoption = rel->rd_indoption; skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); for (i = 0; i < natts; i++) { FmgrInfo *procinfo; Datum arg; bool null; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); arg = index_getattr(itup, i + 1, itupdesc, &null); flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT); ScanKeyEntryInitializeWithInfo(&skey[i], flags, (AttrNumber) (i + 1), InvalidStrategy, InvalidOid, rel->rd_indcollation[i], procinfo, arg); } return skey; }
/* * bt_mk_scankey * Build an insertion scan key that contains comparison data from itup * as well as comparator routines appropriate to the key datatypes. * * The result is intended for use with bt_compare(). */ struct scankey *bt_mk_scankey(struct relation* rel, struct index_tuple* itup) { struct scankey *skey; struct tuple *itupdesc; int natts; int16 *indoption; int i; itupdesc = REL_DESC(rel); natts = REL_GET_NR_ATTR(rel); indoption = rel->rd_indoption; skey = (struct scankey *)palloc(natts * sizeof(struct scankey)); for (i = 0; i < natts; i++) { struct fmgr_info *procinfo; datum_t arg; bool null; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BT_ORDER_PROC); arg = index_getattr(itup, i + 1, itupdesc, &null); flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDEX_OPT_SHIFT); scankey_init_info(&skey[i], flags, (attr_nr_t)(i + 1), INVALID_STRAT, INVALID_OID, rel->rd_indcollation[i], procinfo, arg); } return skey; }
/* * initGinState: fill in an empty GinState struct to describe the index * * Note: assorted subsidiary data is allocated in the CurrentMemoryContext. */ void initGinState(GinState *state, Relation index) { TupleDesc origTupdesc = RelationGetDescr(index); int i; MemSet(state, 0, sizeof(GinState)); state->index = index; state->oneCol = (origTupdesc->natts == 1) ? true : false; state->origTupdesc = origTupdesc; for (i = 0; i < origTupdesc->natts; i++) { if (state->oneCol) state->tupdesc[i] = state->origTupdesc; else { state->tupdesc[i] = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, INT2OID, -1, 0); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, origTupdesc->attrs[i]->atttypid, origTupdesc->attrs[i]->atttypmod, origTupdesc->attrs[i]->attndims); TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2, origTupdesc->attrs[i]->attcollation); } /* * If the compare proc isn't specified in the opclass definition, look * up the index key type's default btree comparator. */ if (index_getprocid(index, i + 1, GIN_COMPARE_PROC) != InvalidOid) { fmgr_info_copy(&(state->compareFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PROC), CurrentMemoryContext); } else { TypeCacheEntry *typentry; typentry = lookup_type_cache(origTupdesc->attrs[i]->atttypid, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify a comparison function for type %s", format_type_be(origTupdesc->attrs[i]->atttypid)))); fmgr_info_copy(&(state->compareFn[i]), &(typentry->cmp_proc_finfo), CurrentMemoryContext); } /* Opclass must always provide extract procs */ fmgr_info_copy(&(state->extractValueFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->extractQueryFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC), CurrentMemoryContext); /* * Check opclass capability to do tri-state or binary logic consistent * check. */ if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid) { fmgr_info_copy(&(state->triConsistentFn[i]), index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC), CurrentMemoryContext); } if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid) { fmgr_info_copy(&(state->consistentFn[i]), index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC), CurrentMemoryContext); } if (state->consistentFn[i].fn_oid == InvalidOid && state->triConsistentFn[i].fn_oid == InvalidOid) { elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"", GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC, i + 1, RelationGetRelationName(index)); } /* * Check opclass capability to do partial match. */ if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid) { fmgr_info_copy(&(state->comparePartialFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC), CurrentMemoryContext); state->canPartialMatch[i] = true; } else { state->canPartialMatch[i] = false; } /* * If the index column has a specified collation, we should honor that * while doing comparisons. However, we may have a collatable storage * type for a noncollatable indexed data type (for instance, hstore * uses text index entries). If there's no index collation then * specify default collation in case the support functions need * collation. This is harmless if the support functions don't care * about collation, so we just do it unconditionally. (We could * alternatively call get_typcollation, but that seems like expensive * overkill --- there aren't going to be any cases where a GIN storage * type has a nondefault collation.) */ if (OidIsValid(index->rd_indcollation[i])) state->supportCollation[i] = index->rd_indcollation[i]; else state->supportCollation[i] = DEFAULT_COLLATION_OID; } }
/* * initGinState: fill in an empty GinState struct to describe the index * * Note: assorted subsidiary data is allocated in the CurrentMemoryContext. */ void initGinState(GinState *state, Relation index) { TupleDesc origTupdesc = RelationGetDescr(index); int i; MemSet(state, 0, sizeof(GinState)); state->index = index; state->oneCol = (origTupdesc->natts == 1) ? true : false; state->origTupdesc = origTupdesc; for (i = 0; i < origTupdesc->natts; i++) { if (state->oneCol) state->tupdesc[i] = state->origTupdesc; else { state->tupdesc[i] = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, INT2OID, -1, 0); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, origTupdesc->attrs[i]->atttypid, origTupdesc->attrs[i]->atttypmod, origTupdesc->attrs[i]->attndims); TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2, origTupdesc->attrs[i]->attcollation); } fmgr_info_copy(&(state->compareFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PROC), CurrentMemoryContext); /* * If the index column has a specified collation, index_getprocinfo * will have installed it into the fmgr info, and we should honor it. * However, we may have a collatable storage type for a noncollatable * indexed data type (for instance, hstore uses text index entries). * If there's no index collation then specify default collation in * case the comparison function needs one. This is harmless if the * comparison function doesn't care about collation, so we just do it * unconditionally. (We could alternatively call get_typcollation, * but that seems like expensive overkill --- there aren't going to be * any cases where a GIN storage type has a nondefault collation.) */ if (!OidIsValid(state->compareFn[i].fn_collation)) fmgr_info_set_collation(DEFAULT_COLLATION_OID, &(state->compareFn[i])); fmgr_info_copy(&(state->extractValueFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->extractQueryFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC), CurrentMemoryContext); fmgr_info_copy(&(state->consistentFn[i]), index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC), CurrentMemoryContext); /* * Check opclass capability to do partial match. */ if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid) { fmgr_info_copy(&(state->comparePartialFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC), CurrentMemoryContext); /* As above, install collation spec in case compare fn needs it */ if (!OidIsValid(state->comparePartialFn[i].fn_collation)) fmgr_info_set_collation(DEFAULT_COLLATION_OID, &(state->comparePartialFn[i])); state->canPartialMatch[i] = true; } else { state->canPartialMatch[i] = false; } } }
/* * Execute the index scan. * * This works by reading index TIDs from the revmap, and obtaining the index * tuples pointed to by them; the summary values in the index tuples are * compared to the scan keys. We return into the TID bitmap all the pages in * ranges corresponding to index tuples that match the scan keys. * * If a TID from the revmap is read as InvalidTID, we know that range is * unsummarized. Pages in those ranges need to be returned regardless of scan * keys. */ int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { Relation idxRel = scan->indexRelation; Buffer buf = InvalidBuffer; BrinDesc *bdesc; Oid heapOid; Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; BlockNumber heapBlk; int totalpages = 0; FmgrInfo *consistentFn; MemoryContext oldcxt; MemoryContext perRangeCxt; opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); /* * We need to know the size of the table so that we know how long to * iterate on the revmap. */ heapOid = IndexGetRelation(RelationGetRelid(idxRel), false); heapRel = heap_open(heapOid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(heapRel); heap_close(heapRel, AccessShareLock); /* * Make room for the consistent support procedures of indexed columns. We * don't look them up here; we do that lazily the first time we see a scan * key reference each of them. We rely on zeroing fn_oid to InvalidOid. */ consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts); /* * Setup and use a per-range memory context, which is reset every time we * loop below. This avoids having to free the tuples within the loop. */ perRangeCxt = AllocSetContextCreate(CurrentMemoryContext, "bringetbitmap cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(perRangeCxt); /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full * view of the table. */ for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; BrinTuple *tup; OffsetNumber off; Size size; CHECK_FOR_INTERRUPTS(); MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE, scan->xs_snapshot); if (tup) { tup = brin_copy_tuple(tup, size); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* * For page ranges with no indexed tuple, we must return the whole * range; otherwise, compare it to the scan keys. */ if (tup == NULL) { addrange = true; } else { BrinMemTuple *dtup; dtup = brin_deform_tuple(bdesc, tup); if (dtup->bt_placeholder) { /* * Placeholder tuples are always returned, regardless of the * values stored in them. */ addrange = true; } else { int keyno; /* * Compare scan keys with summary values stored for the range. * If scan keys are matched, the page range must be added to * the bitmap. We initially assume the range needs to be * added; in particular this serves the case where there are * no keys. */ addrange = true; for (keyno = 0; keyno < scan->numberOfKeys; keyno++) { ScanKey key = &scan->keyData[keyno]; AttrNumber keyattno = key->sk_attno; BrinValues *bval = &dtup->bt_columns[keyattno - 1]; Datum add; /* * The collation of the scan key must match the collation * used in the index column (but only if the search is not * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using * this index ... */ Assert((key->sk_flags & SK_ISNULL) || (key->sk_collation == bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation)); /* First time this column? look up consistent function */ if (consistentFn[keyattno - 1].fn_oid == InvalidOid) { FmgrInfo *tmp; tmp = index_getprocinfo(idxRel, keyattno, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyattno - 1], tmp, CurrentMemoryContext); } /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added * to the output bitmap. * * When there are multiple scan keys, failure to meet the * criteria for a single one of them is enough to discard * the range as a whole, so break out of the loop as soon * as a false return value is obtained. */ add = FunctionCall3Coll(&consistentFn[keyattno - 1], key->sk_collation, PointerGetDatum(bdesc), PointerGetDatum(bval), PointerGetDatum(key)); addrange = DatumGetBool(add); if (!addrange) break; } } } /* add the pages in the range to the output bitmap, if needed */ if (addrange) { BlockNumber pageno; for (pageno = heapBlk; pageno <= heapBlk + opaque->bo_pagesPerRange - 1; pageno++) { MemoryContextSwitchTo(oldcxt); tbm_add_page(tbm, pageno); totalpages++; MemoryContextSwitchTo(perRangeCxt); } } } MemoryContextSwitchTo(oldcxt); MemoryContextDelete(perRangeCxt); if (buf != InvalidBuffer) ReleaseBuffer(buf); /* * XXX We have an approximation of the number of *pages* that our scan * returns, but we don't have a precise idea of the number of heap tuples * involved. */ return totalpages * 10; }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique) { BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return false; }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ Datum brininsert(PG_FUNCTION_ARGS) { Relation idxRel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *nulls = (bool *) PG_GETARG_POINTER(2); ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3); /* we ignore the rest of our arguments */ BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; #ifdef USE_ASSERT_CHECKING BrinTuple *tmptup; BrinMemTuple *tmpdtup; Size tmpsiz; #endif CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very * rarely: first create a placeholder tuple, and addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the * regular operation (straight addValue) below. * * Here we create the tuple to compare with; the actual comparison * is below. */ tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz); tmpdtup = brin_deform_tuple(bdesc, tmptup); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { BrinValues *bval; FmgrInfo *addValue; bval = &tmpdtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); } union_tuples(bdesc, tmpdtup, brtup); tmpdtup->bt_placeholder = dtup->bt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } #ifdef USE_ASSERT_CHECKING { /* * Now we can compare the tuple produced by the union function * with the one from plain addValue. */ BrinTuple *cmptup; Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); } #endif if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return BoolGetDatum(false); }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the type of scan, the operation it's * performing, and the tree ordering. We find the * first item in the tree that satisfies the qualification * associated with the scan descriptor. On exit, the page containing * the current index tuple is read locked and pinned, and the scan's * opaque data entry is updated to include the buffer. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; Page page; BTStack stack; OffsetNumber offnum; BTItem btitem; IndexTuple itup; ItemPointer current; BlockNumber blkno; StrategyNumber strat; bool res; int32 result; bool scanFromEnd; bool continuescan; ScanKey scankeys = NULL; int keysCount = 0; int *nKeyIs = NULL; int i, j; StrategyNumber strat_total; /* * Order the scan keys in our canonical fashion and eliminate any * redundant keys. */ _bt_orderkeys(scan); /* * Quit now if _bt_orderkeys() discovered that the scan keys can never * be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /* * Examine the scan keys to discover where we need to start the scan. */ scanFromEnd = false; strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { nKeyIs = (int *) palloc(so->numberOfKeys * sizeof(int)); for (i = 0; i < so->numberOfKeys; i++) { AttrNumber attno = so->keyData[i].sk_attno; /* ignore keys for already-determined attrs */ if (attno <= keysCount) continue; /* if we didn't find a boundary for the preceding attr, quit */ if (attno > keysCount + 1) break; strat = _bt_getstrat(rel, attno, so->keyData[i].sk_procedure); /* * Can we use this key as a starting boundary for this attr? * * We can use multiple keys if they look like, say, = >= = but we * have to stop after accepting a > or < boundary. */ if (strat == strat_total || strat == BTEqualStrategyNumber) nKeyIs[keysCount++] = i; else if (ScanDirectionIsBackward(dir) && (strat == BTLessStrategyNumber || strat == BTLessEqualStrategyNumber)) { nKeyIs[keysCount++] = i; strat_total = strat; if (strat == BTLessStrategyNumber) break; } else if (ScanDirectionIsForward(dir) && (strat == BTGreaterStrategyNumber || strat == BTGreaterEqualStrategyNumber)) { nKeyIs[keysCount++] = i; strat_total = strat; if (strat == BTGreaterStrategyNumber) break; } } if (keysCount == 0) scanFromEnd = true; } else scanFromEnd = true; /* if we just need to walk down one edge of the tree, do that */ if (scanFromEnd) { if (nKeyIs) pfree(nKeyIs); return _bt_endpoint(scan, dir); } /* * We want to start the scan somewhere within the index. Set up a * scankey we can use to search for the correct starting point. */ scankeys = (ScanKey) palloc(keysCount * sizeof(ScanKeyData)); for (i = 0; i < keysCount; i++) { FmgrInfo *procinfo; j = nKeyIs[i]; /* * _bt_orderkeys disallows it, but it's place to add some code * later */ if (so->keyData[j].sk_flags & SK_ISNULL) { pfree(nKeyIs); pfree(scankeys); elog(ERROR, "btree doesn't support is(not)null, yet"); return false; } procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, so->keyData[j].sk_flags, i + 1, procinfo, CurrentMemoryContext, so->keyData[j].sk_argument); } if (nKeyIs) pfree(nKeyIs); current = &(scan->currentItemData); /* * Use the manufactured scan key to descend the tree and position * ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); if (!BufferIsValid(buf)) { /* Only get here if index is completely empty */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; pfree(scankeys); return false; } /* remember which buffer we have pinned */ so->btso_curbuf = buf; blkno = BufferGetBlockNumber(buf); page = BufferGetPage(buf); /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys); ItemPointerSet(current, blkno, offnum); /* * At this point we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are * less than the scan key and we know that everything on later pages * is greater than or equal to scan key. * * We could step forward in the latter case, but that'd be a waste of * time if we want to scan backwards. So, it's now time to examine * the scan strategy to find the exact place to start the scan. * * Note: if _bt_step fails (meaning we fell off the end of the index in * one direction or the other), we either return false (no matches) or * call _bt_endpoint() to set up a scan starting at that index * endpoint, as appropriate for the desired scan type. * * it's yet other place to add some code later for is(not)null ... */ switch (strat_total) { case BTLessStrategyNumber: /* * Back up one to arrive at last item < scankey */ if (!_bt_step(scan, &buf, BackwardScanDirection)) { pfree(scankeys); return false; } break; case BTLessEqualStrategyNumber: /* * We need to find the last item <= scankey, so step forward * till we find one > scankey, then step back one. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } } for (;;) { offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); if (result < 0) break; if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } } if (!_bt_step(scan, &buf, BackwardScanDirection)) { pfree(scankeys); return false; } break; case BTEqualStrategyNumber: /* * Make sure we are on the first equal item; might have to * step forward if currently at end of page. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); } result = _bt_compare(rel, keysCount, scankeys, page, offnum); if (result != 0) goto nomatches; /* no equal items! */ /* * If a backward scan was specified, need to start with last * equal item not first one. */ if (ScanDirectionIsBackward(dir)) { do { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); } while (result == 0); if (!_bt_step(scan, &buf, BackwardScanDirection)) elog(ERROR, "equal items disappeared?"); } break; case BTGreaterEqualStrategyNumber: /* * We want the first item >= scankey, which is where we are... * unless we're not anywhere at all... */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } } break; case BTGreaterStrategyNumber: /* * We want the first item > scankey, so make sure we are on an * item and then step over any equal items. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); } result = _bt_compare(rel, keysCount, scankeys, page, offnum); while (result == 0) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); } break; } /* okay, current item pointer for the scan is right */ offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); itup = &btitem->bti_itup; /* is the first item actually acceptable? */ if (_bt_checkkeys(scan, itup, dir, &continuescan)) { /* yes, return it */ scan->xs_ctup.t_self = itup->t_tid; res = true; } else if (continuescan) { /* no, but there might be another one that is */ res = _bt_next(scan, dir); } else { /* no tuples in the index match this scan key */ nomatches: ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; _bt_relbuf(rel, buf); res = false; } pfree(scankeys); return res; }
/* * Fetch local cache of AM-specific info about the index, initializing it * if necessary */ SpGistCache * spgGetCache(Relation index) { SpGistCache *cache; if (index->rd_amcache == NULL) { Oid atttype; spgConfigIn in; FmgrInfo *procinfo; Buffer metabuffer; SpGistMetaPageData *metadata; cache = MemoryContextAllocZero(index->rd_indexcxt, sizeof(SpGistCache)); /* SPGiST doesn't support multi-column indexes */ Assert(index->rd_att->natts == 1); /* * Get the actual data type of the indexed column from the index * tupdesc. We pass this to the opclass config function so that * polymorphic opclasses are possible. */ atttype = index->rd_att->attrs[0]->atttypid; /* Call the config function to get config info for the opclass */ in.attType = atttype; procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC); FunctionCall2Coll(procinfo, index->rd_indcollation[0], PointerGetDatum(&in), PointerGetDatum(&cache->config)); /* Get the information we need about each relevant datatype */ fillTypeDesc(&cache->attType, atttype); fillTypeDesc(&cache->attPrefixType, cache->config.prefixType); fillTypeDesc(&cache->attLabelType, cache->config.labelType); /* Last, get the lastUsedPages data from the metapage */ metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); LockBuffer(metabuffer, BUFFER_LOCK_SHARE); metadata = SpGistPageGetMeta(BufferGetPage(metabuffer)); if (metadata->magicNumber != SPGIST_MAGIC_NUMBER) elog(ERROR, "index \"%s\" is not an SP-GiST index", RelationGetRelationName(index)); cache->lastUsedPages = metadata->lastUsedPages; UnlockReleaseBuffer(metabuffer); index->rd_amcache = (void *) cache; } else { /* assume it's up to date */ cache = (SpGistCache *) index->rd_amcache; } return cache; }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the direction of scan, the search * conditions, and the tree ordering. We find the first item (or, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, the page containing * the current index tuple is pinned but not locked, and data about * the matching tuple(s) on the page has been loaded into so->currPos. * scan->xs_ctup.t_self is set to the heap TID of the current tuple, * and if requested, scan->xs_itup points to a copy of the index tuple. * * If there are no matching items in the index, we return FALSE, with no * pins or locks held. * * Note that scan->keyData[], and the so->keyData[] scankey built from it, * are both search-type scankeys (see nbtree/README for more about this). * Within this routine, we build a temporary insertion-type scankey to use * in locating the scan start position. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; BTStack stack; OffsetNumber offnum; StrategyNumber strat; bool nextkey; bool goback; ScanKey startKeys[INDEX_MAX_KEYS]; ScanKeyData scankeys[INDEX_MAX_KEYS]; ScanKeyData notnullkeys[INDEX_MAX_KEYS]; int keysCount = 0; int i; StrategyNumber strat_total; BTScanPosItem *currItem; pgstat_count_index_scan(rel); /* * Examine the scan keys and eliminate any redundant keys; also mark the * keys that must be matched to continue the scan. */ _bt_preprocess_keys(scan); /* * Quit now if _bt_preprocess_keys() discovered that the scan keys can * never be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /*---------- * Examine the scan keys to discover where we need to start the scan. * * We want to identify the keys that can be used as starting boundaries; * these are =, >, or >= keys for a forward scan or =, <, <= keys for * a backwards scan. We can use keys for multiple attributes so long as * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept * a > or < boundary or find an attribute with no boundary (which can be * thought of as the same as "> -infinity"), we can't use keys for any * attributes to its right, because it would break our simplistic notion * of what initial positioning strategy to use. * * When the scan keys include cross-type operators, _bt_preprocess_keys * may not be able to eliminate redundant keys; in such cases we will * arbitrarily pick a usable one for each attribute. This is correct * but possibly not optimal behavior. (For example, with keys like * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when * x=5 would be more efficient.) Since the situation only arises given * a poorly-worded query plus an incomplete opfamily, live with it. * * When both equality and inequality keys appear for a single attribute * (again, only possible when cross-type operators appear), we *must* * select one of the equality keys for the starting point, because * _bt_checkkeys() will stop the scan as soon as an equality qual fails. * For example, if we have keys like "x >= 4 AND x = 10" and we elect to * start at x=4, we will fail and stop before reaching x=10. If multiple * equality quals survive preprocessing, however, it doesn't matter which * one we use --- by definition, they are either redundant or * contradictory. * * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier. * If the index stores nulls at the end of the index we'll be starting * from, and we have no boundary key for the column (which means the key * we deduced NOT NULL from is an inequality key that constrains the other * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to * use as a boundary key. If we didn't do this, we might find ourselves * traversing a lot of null entries at the start of the scan. * * In this loop, row-comparison keys are treated the same as keys on their * first (leftmost) columns. We'll add on lower-order columns of the row * comparison below, if possible. * * The selected scan keys (at most one per index column) are remembered by * storing their addresses into the local startKeys[] array. *---------- */ strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { AttrNumber curattr; ScanKey chosen; ScanKey impliesNN; ScanKey cur; /* * chosen is the so-far-chosen key for the current attribute, if any. * We don't cast the decision in stone until we reach keys for the * next attribute. */ curattr = 1; chosen = NULL; /* Also remember any scankey that implies a NOT NULL constraint */ impliesNN = NULL; /* * Loop iterates from 0 to numberOfKeys inclusive; we use the last * pass to handle after-last-key processing. Actual exit from the * loop is at one of the "break" statements below. */ for (cur = so->keyData, i = 0;; cur++, i++) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { /* * Done looking at keys for curattr. If we didn't find a * usable boundary key, see if we can deduce a NOT NULL key. */ if (chosen == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { /* Yes, so build the key in notnullkeys[keysCount] */ chosen = ¬nullkeys[keysCount]; ScanKeyEntryInitialize(chosen, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), curattr, ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? BTGreaterStrategyNumber : BTLessStrategyNumber), InvalidOid, InvalidOid, InvalidOid, (Datum) 0); } /* * If we still didn't find a usable boundary key, quit; else * save the boundary key pointer in startKeys. */ if (chosen == NULL) break; startKeys[keysCount++] = chosen; /* * Adjust strat_total, and quit if we have stored a > or < * key. */ strat = chosen->sk_strategy; if (strat != BTEqualStrategyNumber) { strat_total = strat; if (strat == BTGreaterStrategyNumber || strat == BTLessStrategyNumber) break; } /* * Done if that was the last attribute, or if next key is not * in sequence (implying no boundary key is available for the * next attribute). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) break; /* * Reset for next attr. */ curattr = cur->sk_attno; chosen = NULL; impliesNN = NULL; } /* * Can we use this key as a starting boundary for this attr? * * If not, does it imply a NOT NULL constraint? (Because * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber, * *any* inequality key works for that; we need not test.) */ switch (cur->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsBackward(dir)) chosen = cur; else impliesNN = cur; } break; case BTEqualStrategyNumber: /* override any non-equality choice */ chosen = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: if (chosen == NULL) { if (ScanDirectionIsForward(dir)) chosen = cur; else impliesNN = cur; } break; } } } /* * If we found no usable boundary keys, we have to start from one end of * the tree. Walk down that edge to the first or last key, and scan from * there. */ if (keysCount == 0) return _bt_endpoint(scan, dir); /* * We want to start the scan somewhere within the index. Set up an * insertion scankey we can use to search for the boundary point we * identified above. The insertion scankey is built in the local * scankeys[] array, using the keys identified by startKeys[]. */ Assert(keysCount <= INDEX_MAX_KEYS); for (i = 0; i < keysCount; i++) { ScanKey cur = startKeys[i]; Assert(cur->sk_attno == i + 1); if (cur->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead. * * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function), but we have to watch * out for nulls, which _bt_preprocess_keys didn't check. A null * in the first row member makes the condition unmatchable, just * like qual_ok = false. */ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) return false; memcpy(scankeys + i, subkey, sizeof(ScanKeyData)); /* * If the row comparison is the last positioning key we accepted, * try to add additional keys from the lower-order row members. * (If we accepted independent conditions on additional index * columns, we use those instead --- doesn't seem worth trying to * determine which is more restrictive.) Note that this is OK * even if the row comparison is of ">" or "<" type, because the * condition applied to all but the last row member is effectively * ">=" or "<=", and so the extra keys don't break the positioning * scheme. But, by the same token, if we aren't able to use all * the row members, then the part of the row comparison that we * did use has to be treated as just a ">=" or "<=" condition, and * so we'd better adjust strat_total accordingly. */ if (i == keysCount - 1) { bool used_all_subkeys = false; Assert(!(subkey->sk_flags & SK_ROW_END)); for (;;) { subkey++; Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_attno != keysCount + 1) break; /* out-of-sequence, can't use it */ if (subkey->sk_strategy != cur->sk_strategy) break; /* wrong direction, can't use it */ if (subkey->sk_flags & SK_ISNULL) break; /* can't use null keys */ Assert(keysCount < INDEX_MAX_KEYS); memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData)); keysCount++; if (subkey->sk_flags & SK_ROW_END) { used_all_subkeys = true; break; } } if (!used_all_subkeys) { switch (strat_total) { case BTLessStrategyNumber: strat_total = BTLessEqualStrategyNumber; break; case BTGreaterStrategyNumber: strat_total = BTGreaterEqualStrategyNumber; break; } } break; /* done with outer loop */ } } else { /* * Ordinary comparison key. Transform the search-style scan key * to an insertion scan key by replacing the sk_func with the * appropriate btree comparison function. * * If scankey operator is not a cross-type comparison, we can use * the cached comparison function; otherwise gotta look it up in * the catalogs. (That can't lead to infinite recursion, since no * indexscan initiated by syscache lookup will use cross-data-type * operators.) * * We support the convention that sk_subtype == InvalidOid means * the opclass input type; this is a hack to simplify life for * ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[i] || cur->sk_subtype == InvalidOid) { FmgrInfo *procinfo; procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, procinfo, cur->sk_argument); } else { RegProcedure cmp_proc; cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], rel->rd_opcintype[i], cur->sk_subtype, BTORDER_PROC); if (!RegProcedureIsValid(cmp_proc)) elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype, cur->sk_attno, RelationGetRelationName(rel)); ScanKeyEntryInitialize(scankeys + i, cur->sk_flags, cur->sk_attno, InvalidStrategy, cur->sk_subtype, cur->sk_collation, cmp_proc, cur->sk_argument); } } } /*---------- * Examine the selected initial-positioning strategy to determine exactly * where we need to start the scan, and set flag variables to control the * code below. * * If nextkey = false, _bt_search and _bt_binsrch will locate the first * item >= scan key. If nextkey = true, they will locate the first * item > scan key. * * If goback = true, we will then step back one item, while if * goback = false, we will start the scan on the located item. *---------- */ switch (strat_total) { case BTLessStrategyNumber: /* * Find first item >= scankey, then back up one to arrive at last * item < scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = false; goback = true; break; case BTLessEqualStrategyNumber: /* * Find first item > scankey, then back up one to arrive at last * item <= scankey. (Note: this positioning strategy is only used * for a backward scan, so that is always the correct starting * position.) */ nextkey = true; goback = true; break; case BTEqualStrategyNumber: /* * If a backward scan was specified, need to start with last equal * item not first one. */ if (ScanDirectionIsBackward(dir)) { /* * This is the same as the <= strategy. We will check at the * end whether the found item is actually =. */ nextkey = true; goback = true; } else { /* * This is the same as the >= strategy. We will check at the * end whether the found item is actually =. */ nextkey = false; goback = false; } break; case BTGreaterEqualStrategyNumber: /* * Find first item >= scankey. (This is only used for forward * scans.) */ nextkey = false; goback = false; break; case BTGreaterStrategyNumber: /* * Find first item > scankey. (This is only used for forward * scans.) */ nextkey = true; goback = false; break; default: /* can't get here, but keep compiler quiet */ elog(ERROR, "unrecognized strat_total: %d", (int) strat_total); return false; } /* * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); /* remember which buffer we have pinned, if any */ so->currPos.buf = buf; if (!BufferIsValid(buf)) { /* * We only get here if the index is completely empty. Lock relation * because nothing finer to lock exists. */ PredicateLockRelation(rel, scan->xs_snapshot); return false; } else PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); /* initialize moreLeft/moreRight appropriately for scan direction */ if (ScanDirectionIsForward(dir)) { so->currPos.moreLeft = false; so->currPos.moreRight = true; } else { so->currPos.moreLeft = true; so->currPos.moreRight = false; } so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey); /* * If nextkey = false, we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are less * than the scan key and we know that everything on later pages is greater * than or equal to scan key. * * If nextkey = true, we are positioned at the first item > scan key, or * possibly at the end of a page on which all the existing items are less * than or equal to the scan key and we know that everything on later * pages is greater than scan key. * * The actually desired starting point is either this item or the prior * one, or in the end-of-page case it's the first item on the next page or * the last item on this page. Adjust the starting offset if needed. (If * this results in an offset before the first item or after the last one, * _bt_readpage will report no items found, and then we'll step to the * next page as needed.) */ if (goback) offnum = OffsetNumberPrev(offnum); /* * Now load data from the first page of the scan. */ if (!_bt_readpage(scan, dir, offnum)) { /* * There's no actually-matching data on this page. Try to advance to * the next page. Return false if there's no matching data at all. */ if (!_bt_steppage(scan, dir)) return false; } /* Drop the lock, but not pin, on the current page */ LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; scan->xs_ctup.t_self = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If autosummarization is enabled, check if we need to summarize the previous * page range. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do for this tuple. */ bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo) { BlockNumber pagesPerRange; BlockNumber origHeapBlk; BlockNumber heapBlk; BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); /* * origHeapBlk is the block number where the insertion occurred. heapBlk * is the first block in the corresponding page range. */ origHeapBlk = ItemPointerGetBlockNumber(heaptid); heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange; for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; int keyno; CHECK_FOR_INTERRUPTS(); /* * If auto-summarization is enabled and we just inserted the first * tuple into the first block of a new non-first page range, request a * summarization run of the previous range. */ if (autosummarize && heapBlk > 0 && heapBlk == origHeapBlk && ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber) { BlockNumber lastPageRange = heapBlk - 1; BrinTuple *lastPageTuple; lastPageTuple = brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); if (!lastPageTuple) { bool recorded; recorded = AutoVacuumRequestWork(AVW_BRINSummarizeRange, RelationGetRelid(idxRel), lastPageRange); if (!recorded) ereport(LOG, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded", RelationGetRelationName(idxRel), lastPageRange))); } else LockBuffer(buf, BUFFER_LOCK_UNLOCK); } brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through in this statement? */ if (bdesc == NULL) { MemoryContextSwitchTo(indexInfo->ii_Context); bdesc = brin_build_desc(idxRel); indexInfo->ii_AmCache = (void *) bdesc; MemoryContextSwitchTo(oldcxt); } /* First time through in this brininsert call? */ if (tupcxt == NULL) { tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup, NULL); /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz, NULL, NULL); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); MemoryContextSwitchTo(oldcxt); if (tupcxt != NULL) MemoryContextDelete(tupcxt); return false; }