/* * Extract stored datum (and possible null category) from GIN tuple */ Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category) { Datum res; bool isnull; if (ginstate->oneCol) { /* * Single column index doesn't store attribute numbers in tuples */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc, &isnull); } else { /* * Since the datum type depends on which index column it's from, we * must be careful to use the right tuple descriptor here. */ OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple); res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber), ginstate->tupdesc[colN - 1], &isnull); } if (isnull) *category = GinGetNullCategory(tuple, ginstate); else *category = GIN_CAT_NORM_KEY; return res; }
/* * Extract stored datum from GIN tuple */ Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple) { bool isnull; Datum res; if (ginstate->oneCol) { /* * Single column index doesn't store attribute numbers in tuples */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc, &isnull); } else { /* * Since the datum type depends on which index column it's from, we * must be careful to use the right tuple descriptor here. */ OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple); res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber), ginstate->tupdesc[colN - 1], &isnull); } Assert(!isnull); return res; }
static int compare_indextuple(const IndexTuple itup1, const IndexTuple itup2, ScanKey entry, int keysz, TupleDesc tupdes, bool *hasnull) { int i; int32 compare; *hasnull = false; for (i = 1; i <= keysz; i++, entry++) { Datum attrDatum1, attrDatum2; bool isNull1, isNull2; attrDatum1 = index_getattr(itup1, i, tupdes, &isNull1); attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2); if (isNull1) { *hasnull = true; if (isNull2) compare = 0; /* NULL "=" NULL */ else if (entry->sk_flags & SK_BT_NULLS_FIRST) compare = -1; /* NULL "<" NOT_NULL */ else compare = 1; /* NULL ">" NOT_NULL */ } else if (isNull2) { *hasnull = true; if (entry->sk_flags & SK_BT_NULLS_FIRST) compare = 1; /* NOT_NULL ">" NULL */ else compare = -1; /* NOT_NULL "<" NULL */ } else { compare = #if PG_VERSION_NUM >= 90100 DatumGetInt32(FunctionCall2Coll(&entry->sk_func, entry->sk_collation, attrDatum1, attrDatum2)); #else DatumGetInt32(FunctionCall2(&entry->sk_func, attrDatum1, attrDatum2)); #endif if (entry->sk_flags & SK_BT_DESC) compare = -compare; } if (compare != 0) return compare; } return 0; }
/* * Extract attribute (column) number of stored entry from GIN tuple */ OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple) { OffsetNumber colN; if (ginstate->oneCol) { /* column number is not stored explicitly */ colN = FirstOffsetNumber; } else { Datum res; bool isnull; /* * First attribute is always int16, so we can safely use any tuple * descriptor to obtain first attribute of tuple */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0], &isnull); Assert(!isnull); colN = DatumGetUInt16(res); Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts); } return colN; }
/* * Write itup vector to page, has no control of free space */ static OffsetNumber gistwritebuffer(Relation r, Page page, IndexTuple *itup, int len, OffsetNumber off) { OffsetNumber l = InvalidOffsetNumber; int i; #ifdef GIST_PAGEADDITEM GISTENTRY tmpdentry; IndexTuple newtup; bool IsNull; #endif for (i = 0; i < len; i++) { #ifdef GIST_PAGEADDITEM l = gistPageAddItem(giststate, r, page, (Item) itup[i], IndexTupleSize(itup[i]), off, LP_USED, &tmpdentry, &newtup); off = OffsetNumberNext(off); if (DatumGetPointer(tmpdentry.key) != NULL && tmpdentry.key != index_getattr(itup[i], 1, r->rd_att, &IsNull)) pfree(DatumGetPointer(tmpdentry.key)); if (itup[i] != newtup) pfree(newtup); #else l = PageAddItem(page, (Item) itup[i], IndexTupleSize(itup[i]), off, LP_USED); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); #endif } return l; }
/* * Make unions of keys in IndexTuple vector (one union datum per index column). * Union Datums are returned into the attr/isnull arrays. * Resulting Datums aren't compressed. */ void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, Datum *attr, bool *isnull) { int i; GistEntryVector *evec; int attrsize; evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = 0; i < giststate->tupdesc->natts; i++) { int j; /* Collect non-null datums for this column */ evec->n = 0; for (j = 0; j < len; j++) { Datum datum; bool IsNull; datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, i, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, false, IsNull); evec->n++; } /* If this column was all NULLs, the union is NULL */ if (evec->n == 0) { attr[i] = (Datum) 0; isnull[i] = true; } else { if (evec->n == 1) { /* unionFn may expect at least two inputs */ evec->n = 2; evec->vector[1] = evec->vector[0]; } /* Make union and store in attr array */ attr[i] = FunctionCall2Coll(&giststate->unionFn[i], giststate->supportCollation[i], PointerGetDatum(evec), PointerGetDatum(&attrsize)); isnull[i] = false; } } }
static void readindextuple(readindexinfo *info, Relation irel, Relation hrel, Datum *values, bool *nulls) { BlockNumber blkno; Page page; OffsetNumber offnum; IndexTuple itup; HeapTupleData htup; Buffer hbuf; AttrNumber attno; TupleDesc tupdesc = RelationGetDescr(irel); blkno = info->blkno; page = info->page; offnum = info->offnum; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); htup.t_self = itup->t_tid; values[1] = ItemPointerGetDatum(&itup->t_tid); if (hrel == NULL) values[2] = PointerGetDatum(cstring_to_text(AOTupleIdToString((AOTupleId *)&itup->t_tid))); else values[2] = PointerGetDatum(cstring_to_text("N/A")); values[3] = PointerGetDatum(istatus_text(PageGetItemId(page, offnum))); if (hrel != NULL) { if (heap_fetch(hrel, SnapshotAny, &htup, &hbuf, true, NULL)) values[4] = PointerGetDatum(hstatus_text(htup.t_data, true)); else if (htup.t_data) values[4] = PointerGetDatum(hstatus_text(htup.t_data, false)); else values[4] = PointerGetDatum(cstring_to_text("NOT_FOUND")); ReleaseBuffer(hbuf); } else values[4] = PointerGetDatum(cstring_to_text("N/A")); for (attno = 1; attno <= tupdesc->natts; attno++) { bool isnull; values[FIXED_COLUMN+attno-1] = index_getattr(itup, attno, tupdesc, &isnull); nulls[FIXED_COLUMN+attno-1] = isnull; } }
/* * Convert an index tuple into Datum/isnull arrays. * * The caller must allocate sufficient storage for the output arrays. * (INDEX_MAX_KEYS entries should be enough.) */ void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull) { int i; /* Assert to protect callers who allocate fixed-size arrays */ Assert(tupleDescriptor->natts <= INDEX_MAX_KEYS); for (i = 0; i < tupleDescriptor->natts; i++) { values[i] = index_getattr(tup, i + 1, tupleDescriptor, &isnull[i]); } }
/* * Decompress all keys in tuple */ void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull) { int i; for (i = 0; i < r->rd_att->natts; i++) { Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, FALSE, isnull[i]); } }
/* * Convert an index tuple into Datum/isnull arrays. * * The caller must allocate sufficient storage for the output arrays. * (INDEX_MAX_KEYS entries should be enough.) */ void index_deform_tuple( struct index_tuple* tup, struct tuple *tuple, datum_t* values, bool* isnull) { int i; /* ASSERT to protect callers who allocate fixed-size arrays */ ASSERT(tuple->natts <= INDEX_MAX_KEYS); for (i = 0; i < tuple->natts; i++) { values[i] = index_getattr(tup, i + 1, tuple, &isnull[i]); } }
/* * Fetch all keys in tuple. * Returns a new HeapTuple containing the originally-indexed data. */ HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple) { MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt); Datum fetchatt[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; int i; for (i = 0; i < r->rd_att->natts; i++) { Datum datum; datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); if (giststate->fetchFn[i].fn_oid != InvalidOid) { if (!isnull[i]) fetchatt[i] = gistFetchAtt(giststate, i, datum, r); else fetchatt[i] = (Datum) 0; } else if (giststate->compressFn[i].fn_oid == InvalidOid) { /* * If opclass does not provide compress method that could change * original value, att is necessarily stored in original form. */ if (!isnull[i]) fetchatt[i] = datum; else fetchatt[i] = (Datum) 0; } else { /* * Index-only scans not supported for this column. Since the * planner chose an index-only scan anyway, it is not interested * in this column, and we can replace it with a NULL. */ isnull[i] = true; fetchatt[i] = (Datum) 0; } } MemoryContextSwitchTo(oldcxt); return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull); }
/* * _hash_checkqual -- does the index tuple satisfy the scan conditions? */ bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { /* * Currently, we can't check any of the scan conditions since we do not * have the original index entry value to supply to the sk_func. Always * return true; we expect that hashgettuple already set the recheck flag * to make the main indexscan code do it. */ #ifdef NOT_USED TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); ScanKey key = scan->keyData; int scanKeySize = scan->numberOfKeys; while (scanKeySize > 0) { Datum datum; bool isNull; Datum test; datum = index_getattr(itup, key->sk_attno, tupdesc, &isNull); /* assume sk_func is strict */ if (isNull) return false; if (key->sk_flags & SK_ISNULL) return false; test = FunctionCall2Coll(&key->sk_func, key->sk_collation, datum, key->sk_argument); if (!DatumGetBool(test)) return false; key++; scanKeySize--; } #endif return true; }
/* * Take a compressed entry, and install it on a page. Since we now know * where the entry will live, we decompress it and recompress it using * that knowledge (some compression routines may want to fish around * on the page, for example, or do something special for leaf nodes.) */ static OffsetNumber gistPageAddItem(GISTSTATE *giststate, Relation r, Page page, Item item, Size size, OffsetNumber offsetNumber, ItemIdFlags flags, GISTENTRY *dentry, IndexTuple *newtup) { GISTENTRY tmpcentry; IndexTuple itup = (IndexTuple) item; OffsetNumber retval; Datum datum; bool IsNull; /* * recompress the item given that we now know the exact page and * offset for insertion */ datum = index_getattr(itup, 1, r->rd_att, &IsNull); gistdentryinit(giststate, 0, dentry, datum, (Relation) 0, (Page) 0, (OffsetNumber) InvalidOffsetNumber, ATTSIZE(datum, r, 1, IsNull), FALSE, IsNull); gistcentryinit(giststate, 0, &tmpcentry, dentry->key, r, page, offsetNumber, dentry->bytes, FALSE); *newtup = gist_tuple_replacekey(r, tmpcentry, itup); retval = PageAddItem(page, (Item) *newtup, IndexTupleSize(*newtup), offsetNumber, flags); if (retval == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); /* be tidy */ if (DatumGetPointer(tmpcentry.key) != NULL && tmpcentry.key != dentry->key && tmpcentry.key != datum) pfree(DatumGetPointer(tmpcentry.key)); return (retval); }
/* * _bt_mkscankey * Build an insertion scan key that contains comparison data from itup * as well as comparator routines appropriate to the key datatypes. * * The result is intended for use with _bt_compare(). */ ScanKey _bt_mkscankey(Relation rel, IndexTuple itup) { ScanKey skey; TupleDesc itupdesc; int natts; int16 *indoption; int i; itupdesc = RelationGetDescr(rel); natts = RelationGetNumberOfAttributes(rel); indoption = rel->rd_indoption; skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); for (i = 0; i < natts; i++) { FmgrInfo *procinfo; Datum arg; bool null; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); arg = index_getattr(itup, i + 1, itupdesc, &null); flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT); ScanKeyEntryInitializeWithInfo(&skey[i], flags, (AttrNumber) (i + 1), InvalidStrategy, InvalidOid, rel->rd_indcollation[i], procinfo, arg); } return skey; }
/* ** Given an IndexTuple to be inserted on a page, this routine replaces ** the key with another key, which may involve generating a new IndexTuple ** if the sizes don't match or if the null status changes. ** ** XXX this only works for a single-column index tuple! */ static IndexTuple gist_tuple_replacekey(Relation r, GISTENTRY entry, IndexTuple t) { bool IsNull; Datum datum = index_getattr(t, 1, r->rd_att, &IsNull); /* * If new entry fits in index tuple, copy it in. To avoid worrying * about null-value bitmask, pass it off to the general * index_formtuple routine if either the previous or new value is * NULL. */ if (!IsNull && DatumGetPointer(entry.key) != NULL && (Size) entry.bytes <= ATTSIZE(datum, r, 1, IsNull)) { memcpy(DatumGetPointer(datum), DatumGetPointer(entry.key), entry.bytes); /* clear out old size */ t->t_info &= ~INDEX_SIZE_MASK; /* or in new size */ t->t_info |= MAXALIGN(entry.bytes + sizeof(IndexTupleData)); return t; } else { /* generate a new index tuple for the compressed entry */ TupleDesc tupDesc = r->rd_att; IndexTuple newtup; char isnull; isnull = DatumGetPointer(entry.key) != NULL ? ' ' : 'n'; newtup = (IndexTuple) index_formtuple(tupDesc, &(entry.key), &isnull); newtup->t_tid = t->t_tid; return newtup; } }
/* * StoreIndexTuple * Fill the slot with data from the index tuple. * * At some point this might be generally-useful functionality, but * right now we don't need it elsewhere. */ static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) { int nindexatts = itupdesc->natts; Datum *values = slot->tts_values; bool *isnull = slot->tts_isnull; int i; /* * Note: we must use the tupdesc supplied by the AM in index_getattr, not * the slot's tupdesc, in case the latter has different datatypes (this * happens for btree name_ops in particular). They'd better have the same * number of columns though, as well as being datatype-compatible which is * something we can't so easily check. */ Assert(slot->tts_tupleDescriptor->natts == nindexatts); ExecClearTuple(slot); for (i = 0; i < nindexatts; i++) values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]); ExecStoreVirtualTuple(slot); }
/* * bt_mk_scankey * Build an insertion scan key that contains comparison data from itup * as well as comparator routines appropriate to the key datatypes. * * The result is intended for use with bt_compare(). */ struct scankey *bt_mk_scankey(struct relation* rel, struct index_tuple* itup) { struct scankey *skey; struct tuple *itupdesc; int natts; int16 *indoption; int i; itupdesc = REL_DESC(rel); natts = REL_GET_NR_ATTR(rel); indoption = rel->rd_indoption; skey = (struct scankey *)palloc(natts * sizeof(struct scankey)); for (i = 0; i < natts; i++) { struct fmgr_info *procinfo; datum_t arg; bool null; int flags; /* * We can use the cached (default) support procs since no cross-type * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BT_ORDER_PROC); arg = index_getattr(itup, i + 1, itupdesc, &null); flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDEX_OPT_SHIFT); scankey_init_info(&skey[i], flags, (attr_nr_t)(i + 1), INVALID_STRAT, INVALID_OID, rel->rd_indcollation[i], procinfo, arg); } return skey; }
/* * Adds array of item pointers to tuple's posting list or * creates posting tree and tuple pointed to tree in a case * of not enough space. Max size of tuple is defined in * GinFormTuple(). */ static IndexTuple addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) { bool isnull; Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull); IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old)); if (res) { /* good, small enough */ MergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), items, nitem ); GinSetNPosting(res, nitem + GinGetNPosting(old)); } else { BlockNumber postingRoot; GinPostingTreeScan *gdi; /* posting list becomes big, so we need to make posting's tree */ res = GinFormTuple(ginstate, key, NULL, 0); postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old)); GinSetPostingTree(res, postingRoot); gdi = prepareScanPostingTree(index, postingRoot, FALSE); gdi->btree.isBuild = isBuild; insertItemPointer(gdi, items, nitem); pfree(gdi); } return res; }
/* * _hash_checkqual -- does the index tuple satisfy the scan conditions? */ bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); ScanKey key = scan->keyData; int scanKeySize = scan->numberOfKeys; IncrIndexProcessed(); while (scanKeySize > 0) { Datum datum; bool isNull; Datum test; datum = index_getattr(itup, key->sk_attno, tupdesc, &isNull); /* assume sk_func is strict */ if (isNull) return false; if (key->sk_flags & SK_ISNULL) return false; test = FunctionCall2(&key->sk_func, datum, key->sk_argument); if (!DatumGetBool(test)) return false; key++; scanKeySize--; } return true; }
static void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY attdata[], bool decompvec[], bool isnull[]) { int i; Datum datum; for (i = 0; i < r->rd_att->natts; i++) { datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]), FALSE, isnull[i]); if (isAttByVal(giststate, i)) decompvec[i] = FALSE; else { if (attdata[i].key == datum || isnull[i]) decompvec[i] = FALSE; else decompvec[i] = TRUE; } } }
/* * For a newly inserted heap tid, check if an entry with this tid * already exists in a unique index. If it does, abort the inserting * transaction. */ static void _bt_validate_tid(Relation irel, ItemPointer h_tid) { MIRROREDLOCK_BUFMGR_DECLARE; BlockNumber blkno; BlockNumber num_pages; Buffer buf; Page page; BTPageOpaque opaque; IndexTuple itup; OffsetNumber maxoff, minoff, offnum; elog(DEBUG1, "validating tid (%d,%d) for index (%s)", ItemPointerGetBlockNumber(h_tid), ItemPointerGetOffsetNumber(h_tid), RelationGetRelationName(irel)); blkno = BTREE_METAPAGE + 1; num_pages = RelationGetNumberOfBlocks(irel); MIRROREDLOCK_BUFMGR_LOCK; for (; blkno < num_pages; blkno++) { buf = ReadBuffer(irel, blkno); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!PageIsNew(page)) _bt_checkpage(irel, buf); if (P_ISLEAF(opaque)) { minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (ItemPointerEquals(&itup->t_tid, h_tid)) { Form_pg_attribute key_att = RelationGetDescr(irel)->attrs[0]; Oid key = InvalidOid; bool isnull; if (key_att->atttypid == OIDOID) { key = DatumGetInt32( index_getattr(itup, 1, RelationGetDescr(irel), &isnull)); elog(ERROR, "found tid (%d,%d), %s (%d) already in index (%s)", ItemPointerGetBlockNumber(h_tid), ItemPointerGetOffsetNumber(h_tid), NameStr(key_att->attname), key, RelationGetRelationName(irel)); } else { elog(ERROR, "found tid (%d,%d) already in index (%s)", ItemPointerGetBlockNumber(h_tid), ItemPointerGetOffsetNumber(h_tid), RelationGetRelationName(irel)); } } } } ReleaseBuffer(buf); } MIRROREDLOCK_BUFMGR_UNLOCK; }
/* * Post vacuum, iterate over all entries in index, check if the h_tid * of each entry exists and is not dead. For specific system tables, * also ensure that the key in index entry matches the corresponding * attribute in the heap tuple. */ void _bt_validate_vacuum(Relation irel, Relation hrel, TransactionId oldest_xmin) { MIRROREDLOCK_BUFMGR_DECLARE; BlockNumber blkno; BlockNumber num_pages; Buffer ibuf = InvalidBuffer; Buffer hbuf = InvalidBuffer; Page ipage; BTPageOpaque opaque; IndexTuple itup; HeapTupleData htup; OffsetNumber maxoff, minoff, offnum; Oid ioid, hoid; bool isnull; blkno = BTREE_METAPAGE + 1; num_pages = RelationGetNumberOfBlocks(irel); elog(LOG, "btvalidatevacuum: index %s, heap %s", RelationGetRelationName(irel), RelationGetRelationName(hrel)); MIRROREDLOCK_BUFMGR_LOCK; for (; blkno < num_pages; blkno++) { ibuf = ReadBuffer(irel, blkno); ipage = BufferGetPage(ibuf); opaque = (BTPageOpaque) PageGetSpecialPointer(ipage); if (!PageIsNew(ipage)) _bt_checkpage(irel, ibuf); if (P_ISLEAF(opaque)) { minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(ipage); for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { itup = (IndexTuple) PageGetItem(ipage, PageGetItemId(ipage, offnum)); ItemPointerCopy(&itup->t_tid, &htup.t_self); /* * TODO: construct a tid bitmap based on index tids * and fetch heap tids in order afterwards. That will * also allow validating if a heap tid appears twice * in a unique index. */ if (!heap_release_fetch(hrel, SnapshotAny, &htup, &hbuf, true, NULL)) { elog(ERROR, "btvalidatevacuum: tid (%d,%d) from index %s " "not found in heap %s", ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel), RelationGetRelationName(hrel)); } switch (HeapTupleSatisfiesVacuum(hrel, htup.t_data, oldest_xmin, hbuf)) { case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: case HEAPTUPLE_DELETE_IN_PROGRESS: /* these tuples are considered alive by vacuum */ break; case HEAPTUPLE_DEAD: elog(ERROR, "btvalidatevacuum: vacuum did not remove " "dead tuple (%d,%d) from heap %s and index %s", ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(hrel), RelationGetRelationName(irel)); break; default: elog(ERROR, "btvalidatevacuum: invalid visibility"); break; } switch(RelationGetRelid(irel)) { case DatabaseOidIndexId: case TypeOidIndexId: case ClassOidIndexId: case ConstraintOidIndexId: hoid = HeapTupleGetOid(&htup); ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); if (hoid != ioid) { elog(ERROR, "btvalidatevacuum: index oid(%d) != heap oid(%d)" " tuple (%d,%d) index %s", ioid, hoid, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } break; case GpRelationNodeOidIndexId: hoid = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull); ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); if (hoid != ioid) { elog(ERROR, "btvalidatevacuum: index oid(%d) != heap oid(%d)" " tuple (%d,%d) index %s", ioid, hoid, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } int4 hsegno = heap_getattr(&htup, 2, RelationGetDescr(hrel), &isnull); int4 isegno = index_getattr(itup, 2, RelationGetDescr(irel), &isnull); if (isegno != hsegno) { elog(ERROR, "btvalidatevacuum: index segno(%d) != heap segno(%d)" " tuple (%d,%d) index %s", isegno, hsegno, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } break; default: break; } if (RelationGetNamespace(irel) == PG_AOSEGMENT_NAMESPACE) { int4 isegno = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); int4 hsegno = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull); if (isegno != hsegno) { elog(ERROR, "btvalidatevacuum: index segno(%d) != heap segno(%d)" " tuple (%d,%d) index %s", isegno, hsegno, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } } } } if (BufferIsValid(ibuf)) ReleaseBuffer(ibuf); } if (BufferIsValid(hbuf)) ReleaseBuffer(hbuf); MIRROREDLOCK_BUFMGR_UNLOCK; }
/*---------- * _bt_compare() -- Compare scankey to a particular tuple on the page. * * The passed scankey must be an insertion-type scankey (see nbtree/README), * but it can omit the rightmost column(s) of the index. * * keysz: number of key conditions to be checked (might be less than the * number of index columns!) * page/offnum: location of btree item to be compared to. * * This routine returns: * <0 if scankey < tuple at offnum; * 0 if scankey == tuple at offnum; * >0 if scankey > tuple at offnum. * NULLs in the keys are treated as sortable values. Therefore * "equality" does not necessarily mean that the item should be * returned to the caller as a matching key! * * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be * "minus infinity": this routine will always claim it is less than the * scankey. The actual key value stored (if any, which there probably isn't) * does not matter. This convention allows us to implement the Lehman and * Yao convention that the first down-link pointer is before the first key. * See backend/access/nbtree/README for details. *---------- */ int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum) { TupleDesc itupdesc = RelationGetDescr(rel); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); IndexTuple itup; int i; /* * Force result ">" if target item is first data item on an internal page * --- see NOTE above. */ if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) return 1; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); /* * The scan key is set up with the attribute number associated with each * term in the key. It is important that, if the index is multi-key, the * scan contain the first k key attributes, and that they be in order. If * you think about how multi-key ordering works, you'll understand why * this is. * * We don't test for violation of this condition here, however. The * initial setup for the index scan had better have gotten it right (see * _bt_first). */ for (i = 1; i <= keysz; i++) { Datum datum; bool isNull; int32 result; datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull); /* see comments about NULLs handling in btbuild */ if (scankey->sk_flags & SK_ISNULL) /* key is NULL */ { if (isNull) result = 0; /* NULL "=" NULL */ else if (scankey->sk_flags & SK_BT_NULLS_FIRST) result = -1; /* NULL "<" NOT_NULL */ else result = 1; /* NULL ">" NOT_NULL */ } else if (isNull) /* key is NOT_NULL and item is NULL */ { if (scankey->sk_flags & SK_BT_NULLS_FIRST) result = 1; /* NOT_NULL ">" NULL */ else result = -1; /* NOT_NULL "<" NULL */ } else { /* * The sk_func needs to be passed the index value as left arg and * the sk_argument as right arg (they might be of different * types). Since it is convenient for callers to think of * _bt_compare as comparing the scankey to the index item, we have * to flip the sign of the comparison result. (Unless it's a DESC * column, in which case we *don't* flip the sign.) */ result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func, scankey->sk_collation, datum, scankey->sk_argument)); if (!(scankey->sk_flags & SK_BT_DESC)) result = -result; } /* if the keys are unequal, return the difference */ if (result != 0) return result; scankey++; } /* if we get here, the keys are equal */ return 0; }
/* * Read tuples in correct sort order from tuplesort, and load them into * btree leaves. */ static void _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) { BTPageState *state = NULL; bool merge = (btspool2 != NULL); IndexTuple itup, itup2 = NULL; bool load1; TupleDesc tupdes = RelationGetDescr(wstate->index); int i, keysz = RelationGetNumberOfAttributes(wstate->index); ScanKey indexScanKey = NULL; SortSupport sortKeys; if (merge) { /* * Another BTSpool for dead tuples exists. Now we have to merge * btspool and btspool2. */ /* the preparation of merge */ itup = tuplesort_getindextuple(btspool->sortstate, true); itup2 = tuplesort_getindextuple(btspool2->sortstate, true); indexScanKey = _bt_mkscankey_nodata(wstate->index); /* Prepare SortSupport data for each column */ sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData)); for (i = 0; i < keysz; i++) { SortSupport sortKey = sortKeys + i; ScanKey scanKey = indexScanKey + i; int16 strategy; sortKey->ssup_cxt = CurrentMemoryContext; sortKey->ssup_collation = scanKey->sk_collation; sortKey->ssup_nulls_first = (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; sortKey->ssup_attno = scanKey->sk_attno; /* Abbreviation is not supported here */ sortKey->abbreviate = false; AssertState(sortKey->ssup_attno != 0); strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? BTGreaterStrategyNumber : BTLessStrategyNumber; PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey); } _bt_freeskey(indexScanKey); for (;;) { load1 = true; /* load BTSpool next ? */ if (itup2 == NULL) { if (itup == NULL) break; } else if (itup != NULL) { for (i = 1; i <= keysz; i++) { SortSupport entry; Datum attrDatum1, attrDatum2; bool isNull1, isNull2; int32 compare; entry = sortKeys + i - 1; attrDatum1 = index_getattr(itup, i, tupdes, &isNull1); attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2); compare = ApplySortComparator(attrDatum1, isNull1, attrDatum2, isNull2, entry); if (compare > 0) { load1 = false; break; } else if (compare < 0) break; } } else load1 = false; /* When we see first tuple, create first index page */ if (state == NULL) state = _bt_pagestate(wstate, 0); if (load1) { _bt_buildadd(wstate, state, itup); itup = tuplesort_getindextuple(btspool->sortstate, true); } else { _bt_buildadd(wstate, state, itup2); itup2 = tuplesort_getindextuple(btspool2->sortstate, true); } } pfree(sortKeys); } else { /* merge is unnecessary */ while ((itup = tuplesort_getindextuple(btspool->sortstate, true)) != NULL) { /* When we see first tuple, create first index page */ if (state == NULL) state = _bt_pagestate(wstate, 0); _bt_buildadd(wstate, state, itup); } } /* Close down final pages and write the metapage */ _bt_uppershutdown(wstate, state); /* * If the index is WAL-logged, we must fsync it down to disk before it's * safe to commit the transaction. (For a non-WAL-logged index we don't * care since the index will be uninteresting after a crash anyway.) * * It's obvious that we must do this when not WAL-logging the build. It's * less obvious that we have to do it even if we did WAL-log the index * pages. The reason is that since we're building outside shared buffers, * a CHECKPOINT occurring during the build has no way to flush the * previously written data to disk (indeed it won't know the index even * exists). A crash later on would replay WAL from the checkpoint, * therefore it wouldn't replay our earlier WAL entries. If we do not * fsync those pages here, they might still not be on disk when the crash * occurs. */ if (RelationNeedsWAL(wstate->index)) { RelationOpenSmgr(wstate->index); smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM); } }
/*---------- * _bt_compare() -- Compare scankey to a particular tuple on the page. * * keysz: number of key conditions to be checked (might be less than the * total length of the scan key!) * page/offnum: location of btree item to be compared to. * * This routine returns: * <0 if scankey < tuple at offnum; * 0 if scankey == tuple at offnum; * >0 if scankey > tuple at offnum. * NULLs in the keys are treated as sortable values. Therefore * "equality" does not necessarily mean that the item should be * returned to the caller as a matching key! * * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be * "minus infinity": this routine will always claim it is less than the * scankey. The actual key value stored (if any, which there probably isn't) * does not matter. This convention allows us to implement the Lehman and * Yao convention that the first down-link pointer is before the first key. * See backend/access/nbtree/README for details. *---------- */ int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum) { TupleDesc itupdesc = RelationGetDescr(rel); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); BTItem btitem; IndexTuple itup; int i; /* * Force result ">" if target item is first data item on an internal * page --- see NOTE above. */ if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) return 1; btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); itup = &(btitem->bti_itup); /* * The scan key is set up with the attribute number associated with * each term in the key. It is important that, if the index is * multi-key, the scan contain the first k key attributes, and that * they be in order. If you think about how multi-key ordering works, * you'll understand why this is. * * We don't test for violation of this condition here, however. The * initial setup for the index scan had better have gotten it right * (see _bt_first). */ for (i = 0; i < keysz; i++) { ScanKey entry = &scankey[i]; Datum datum; bool isNull; int32 result; datum = index_getattr(itup, entry->sk_attno, itupdesc, &isNull); /* see comments about NULLs handling in btbuild */ if (entry->sk_flags & SK_ISNULL) /* key is NULL */ { if (isNull) result = 0; /* NULL "=" NULL */ else result = 1; /* NULL ">" NOT_NULL */ } else if (isNull) /* key is NOT_NULL and item is NULL */ { result = -1; /* NOT_NULL "<" NULL */ } else { result = DatumGetInt32(FunctionCall2(&entry->sk_func, entry->sk_argument, datum)); } /* if the keys are unequal, return the difference */ if (result != 0) return result; } /* if we get here, the keys are equal */ return 0; }
/* * Test whether an indextuple satisfies a row-comparison scan condition. * * Return true if so, false if not. If not, also clear *continuescan if * it's not possible for any future tuples in the current scan direction * to pass the qual. * * This is a subroutine for _bt_checkkeys, which see for more info. */ static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; bool result; /* First subkey should be same as the header says */ Assert(subkey->sk_attno == skey->sk_attno); /* Loop over columns of the row condition */ for (;;) { Datum datum; bool isNull; Assert(subkey->sk_flags & SK_ROW_MEMBER); datum = index_getattr(tuple, subkey->sk_attno, tupdesc, &isNull); if (isNull) { if (subkey->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have * reached the lower limit of the range of values for this * index attr. On a backward scan, we can stop if this qual is * one of the "must match" subset. On a forward scan, * however, we should keep going. */ if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; } else { /* * Since NULLs are sorted after non-NULLs, we know we have * reached the upper limit of the range of values for this * index attr. On a forward scan, we can stop if this qual is * one of the "must match" subset. On a backward scan, * however, we should keep going. */ if ((subkey->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; } /* * In any case, this indextuple doesn't match the qual. */ return false; } if (subkey->sk_flags & SK_ISNULL) { /* * Unlike the simple-scankey case, this isn't a disallowed case. * But it can never match. If all the earlier row comparison * columns are required for the scan direction, we can stop the * scan, because there can't be another tuple that will succeed. */ if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument)) subkey--; if ((subkey->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; return false; } /* Perform the test --- three-way comparison not bool operator */ cmpresult = DatumGetInt32(FunctionCall2(&subkey->sk_func, datum, subkey->sk_argument)); if (subkey->sk_flags & SK_BT_DESC) cmpresult = -cmpresult; /* Done comparing if unequal, else advance to next column */ if (cmpresult != 0) break; if (subkey->sk_flags & SK_ROW_END) break; subkey++; } /* * At this point cmpresult indicates the overall result of the row * comparison, and subkey points to the deciding column (or the last * column if the result is "="). */ switch (subkey->sk_strategy) { /* EQ and NE cases aren't allowed here */ case BTLessStrategyNumber: result = (cmpresult < 0); break; case BTLessEqualStrategyNumber: result = (cmpresult <= 0); break; case BTGreaterEqualStrategyNumber: result = (cmpresult >= 0); break; case BTGreaterStrategyNumber: result = (cmpresult > 0); break; default: elog(ERROR, "unrecognized RowCompareType: %d", (int) subkey->sk_strategy); result = 0; /* keep compiler quiet */ break; } if (!result) { /* * Tuple fails this qual. If it's a required qual for the current * scan direction, then we can conclude no further tuples will pass, * either. Note we have to look at the deciding column, not * necessarily the first or last column of the row condition. */ if ((subkey->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; } return result; }
/* * Test whether an indextuple satisfies all the scankey conditions. * * If so, copy its TID into scan->xs_ctup.t_self, and return TRUE. * If not, return FALSE (xs_ctup is not changed). * * If the tuple fails to pass the qual, we also determine whether there's * any need to continue the scan beyond this tuple, and set *continuescan * accordingly. See comments for _bt_preprocess_keys(), above, about how * this is done. * * scan: index scan descriptor (containing a search-type scankey) * page: buffer page containing index tuple * offnum: offset number of index tuple (must be a valid item!) * dir: direction we are scanning in * continuescan: output parameter (will be set correctly in all cases) */ bool _bt_checkkeys(IndexScanDesc scan, Page page, OffsetNumber offnum, ScanDirection dir, bool *continuescan) { ItemId iid = PageGetItemId(page, offnum); bool tuple_valid; IndexTuple tuple; TupleDesc tupdesc; BTScanOpaque so; int keysz; int ikey; ScanKey key; *continuescan = true; /* default assumption */ /* * If the scan specifies not to return killed tuples, then we treat a * killed tuple as not passing the qual. Most of the time, it's a win to * not bother examining the tuple's index keys, but just return * immediately with continuescan = true to proceed to the next tuple. * However, if this is the last tuple on the page, we should check the * index keys to prevent uselessly advancing to the next page. */ if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) { /* return immediately if there are more tuples on the page */ if (ScanDirectionIsForward(dir)) { if (offnum < PageGetMaxOffsetNumber(page)) return false; } else { BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (offnum > P_FIRSTDATAKEY(opaque)) return false; } /* * OK, we want to check the keys, but we'll return FALSE even if the * tuple passes the key tests. */ tuple_valid = false; } else tuple_valid = true; tuple = (IndexTuple) PageGetItem(page, iid); IncrIndexProcessed(); tupdesc = RelationGetDescr(scan->indexRelation); so = (BTScanOpaque) scan->opaque; keysz = so->numberOfKeys; for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++) { Datum datum; bool isNull; Datum test; /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan)) continue; return false; } datum = index_getattr(tuple, key->sk_attno, tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* Handle IS NULL tests */ Assert(key->sk_flags & SK_SEARCHNULL); if (isNull) continue; /* tuple satisfies this qual */ /* * Tuple fails this qual. If it's a required qual for the current * scan direction, then we can conclude no further tuples will * pass, either. */ if ((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; else if ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; /* * In any case, this indextuple doesn't match the qual. */ return false; } if (isNull) { if (key->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have * reached the lower limit of the range of values for this * index attr. On a backward scan, we can stop if this qual * is one of the "must match" subset. On a forward scan, * however, we should keep going. */ if ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; } else { /* * Since NULLs are sorted after non-NULLs, we know we have * reached the upper limit of the range of values for this * index attr. On a forward scan, we can stop if this qual is * one of the "must match" subset. On a backward scan, * however, we should keep going. */ if ((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; } /* * In any case, this indextuple doesn't match the qual. */ return false; } test = FunctionCall2(&key->sk_func, datum, key->sk_argument); if (!DatumGetBool(test)) { /* * Tuple fails this qual. If it's a required qual for the current * scan direction, then we can conclude no further tuples will * pass, either. * * Note: because we stop the scan as soon as any required equality * qual fails, it is critical that equality quals be used for the * initial positioning in _bt_first() when they are available. See * comments in _bt_first(). */ if ((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; else if ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) *continuescan = false; /* * In any case, this indextuple doesn't match the qual. */ return false; } } /* If we get here, the tuple passes all index quals. */ if (tuple_valid) scan->xs_ctup.t_self = tuple->t_tid; return tuple_valid; }
/* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * * The index tuple might represent either a heap tuple or a lower index page, * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate * whether recheck is needed. We recheck if any of the consistent() functions * request it. recheck is not interesting when examining a non-leaf entry, * since we must visit the lower index page if there's any doubt. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. * * We must decompress the key in the IndexTuple before passing it to the * sk_funcs (which actually are the opclass Consistent or Distance methods). * * Note that this function is always invoked in a short-lived memory context, * so we don't need to worry about cleaning up allocated memory, either here * or in the implementation of any Consistent or Distance methods. */ static bool gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, bool *recheck_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; double *distance_p; Relation r = scan->indexRelation; *recheck_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with * minimum possible distances. This means we'll always follow it to the * referenced page. * * GPDB: the virtual TIDs created for AO tables use the full range of * offset numbers from 0 to 65535. So a tuple on leaf page that looks like * an invalid tuple, is actually ok. */ if (!GistPageIsLeaf(page) && GistTupleIsInvalid(tuple)) { int i; for (i = 0; i < scan->numberOfOrderBys; i++) so->distances[i] = -get_float8_infinity(); return true; } /* Check whether it matches according to the Consistent functions */ while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* * On non-leaf page we can't conclude that child hasn't NULL * values because of assumption in GiST: union (VAL, NULL) is VAL. * But if on non-leaf page key IS NULL, then all children are * NULL. */ if (key->sk_flags & SK_SEARCHNULL) { if (GistPageIsLeaf(page) && !isNull) return false; } else { Assert(key->sk_flags & SK_SEARCHNOTNULL); if (isNull) return false; } } else if (isNull) { return false; } else { Datum test; bool recheck; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, the comparison operator's strategy number and subtype * from pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * We initialize the recheck flag to true (the safest assumption) * in case the Consistent function forgets to set it. */ recheck = true; test = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); if (!DatumGetBool(test)) return false; *recheck_p |= recheck; } key++; keySize--; } /* OK, it passes --- now let's compute the distances */ key = scan->orderByData; distance_p = so->distances; keySize = scan->numberOfOrderBys; while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null and sorts to the end */ *distance_p = get_float8_infinity(); } else { Datum dist; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Distance function to evaluate the distance. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, and the ordering operator's strategy number and subtype * from pg_amop. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * Note that Distance functions don't get a recheck argument. We * can't tolerate lossy distance calculations on leaf tuples; * there is no opportunity to re-sort the tuples afterwards. */ dist = FunctionCall4Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype)); *distance_p = DatumGetFloat8(dist); } key++; distance_p++; keySize--; } return true; }
/* * find entry with lowest penalty */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber maxoff; OffsetNumber i; OffsetNumber which; float sum_grow, which_grow[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; maxoff = PageGetMaxOffsetNumber(p); *which_grow = -1.0; which = InvalidOffsetNumber; sum_grow = 1; gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); Assert(maxoff >= FirstOffsetNumber); Assert(!GistPageIsLeaf(p)); for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { int j; IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup)) { ereport(LOG, (errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery", RelationGetRelationName(r)))); continue; } sum_grow = 0; for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (which_grow[j] < 0 || usize < which_grow[j]) { which = i; which_grow[j] = usize; if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) sum_grow += usize; else { sum_grow = 1; break; } } } if (which == InvalidOffsetNumber) which = FirstOffsetNumber; return which; }
bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey, Datum *attr, bool *isnull) { int i; GistEntryVector *evec; int attrsize; evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = startkey; i < giststate->tupdesc->natts; i++) { int j; evec->n = 0; if (!isnull[i]) { gistentryinit(evec->vector[evec->n], attr[i], NULL, NULL, (OffsetNumber) 0, FALSE); evec->n++; } for (j = 0; j < len; j++) { Datum datum; bool IsNull; if (GistTupleIsInvalid(itvec[j])) return FALSE; /* signals that union with invalid tuple => * result is invalid */ datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, i, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, FALSE, IsNull); evec->n++; } /* If this tuple vector was all NULLs, the union is NULL */ if (evec->n == 0) { attr[i] = (Datum) 0; isnull[i] = TRUE; } else { if (evec->n == 1) { evec->n = 2; evec->vector[1] = evec->vector[0]; } /* Make union and store in attr array */ attr[i] = FunctionCall2(&giststate->unionFn[i], PointerGetDatum(evec), PointerGetDatum(&attrsize)); isnull[i] = FALSE; } } return TRUE; }