/* * Per-heap-tuple callback for IndexBuildHeapScan. * * Note we don't worry about the page range at the end of the table here; it is * present in the build state struct after we're called the last time, but not * inserted into the index. Caller must ensure to do so, if appropriate. */ static void brinbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate) { BrinBuildState *state = (BrinBuildState *) brstate; BlockNumber thisblock; int i; thisblock = ItemPointerGetBlockNumber(&htup->t_self); /* * If we're in a block that belongs to a future range, summarize what * we've got and start afresh. Note the scan might have skipped many * pages, if they were devoid of live tuples; make sure to insert index * tuples for those too. */ while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1) { BRIN_elog((DEBUG2, "brinbuildCallback: completed a range: %u--%u", state->bs_currRangeStart, state->bs_currRangeStart + state->bs_pagesPerRange)); /* create the index tuple and insert it */ form_and_insert_tuple(state); /* set state to correspond to the next range */ state->bs_currRangeStart += state->bs_pagesPerRange; /* re-initialize state for it */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); } /* Accumulate the current tuple into the running state */ for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++) { FmgrInfo *addValue; BrinValues *col; Form_pg_attribute attr = TupleDescAttr(state->bs_bdesc->bd_tupdesc, i); col = &state->bs_dtuple->bt_columns[i]; addValue = index_getprocinfo(index, i + 1, BRIN_PROCNUM_ADDVALUE); /* * Update dtuple state, if and as necessary. */ FunctionCall4Coll(addValue, attr->attcollation, PointerGetDatum(state->bs_bdesc), PointerGetDatum(col), values[i], isnull[i]); } }
/* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * * The index tuple might represent either a heap tuple or a lower index page, * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate * whether recheck is needed. We recheck if any of the consistent() functions * request it. recheck is not interesting when examining a non-leaf entry, * since we must visit the lower index page if there's any doubt. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. * * We must decompress the key in the IndexTuple before passing it to the * sk_funcs (which actually are the opclass Consistent or Distance methods). * * Note that this function is always invoked in a short-lived memory context, * so we don't need to worry about cleaning up allocated memory, either here * or in the implementation of any Consistent or Distance methods. */ static bool gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, bool *recheck_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; double *distance_p; Relation r = scan->indexRelation; *recheck_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with * minimum possible distances. This means we'll always follow it to the * referenced page. * * GPDB: the virtual TIDs created for AO tables use the full range of * offset numbers from 0 to 65535. So a tuple on leaf page that looks like * an invalid tuple, is actually ok. */ if (!GistPageIsLeaf(page) && GistTupleIsInvalid(tuple)) { int i; for (i = 0; i < scan->numberOfOrderBys; i++) so->distances[i] = -get_float8_infinity(); return true; } /* Check whether it matches according to the Consistent functions */ while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* * On non-leaf page we can't conclude that child hasn't NULL * values because of assumption in GiST: union (VAL, NULL) is VAL. * But if on non-leaf page key IS NULL, then all children are * NULL. */ if (key->sk_flags & SK_SEARCHNULL) { if (GistPageIsLeaf(page) && !isNull) return false; } else { Assert(key->sk_flags & SK_SEARCHNOTNULL); if (isNull) return false; } } else if (isNull) { return false; } else { Datum test; bool recheck; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, the comparison operator's strategy number and subtype * from pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * We initialize the recheck flag to true (the safest assumption) * in case the Consistent function forgets to set it. */ recheck = true; test = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); if (!DatumGetBool(test)) return false; *recheck_p |= recheck; } key++; keySize--; } /* OK, it passes --- now let's compute the distances */ key = scan->orderByData; distance_p = so->distances; keySize = scan->numberOfOrderBys; while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null and sorts to the end */ *distance_p = get_float8_infinity(); } else { Datum dist; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Distance function to evaluate the distance. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, and the ordering operator's strategy number and subtype * from pg_amop. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * Note that Distance functions don't get a recheck argument. We * can't tolerate lossy distance calculations on leaf tuples; * there is no opportunity to re-sort the tuples afterwards. */ dist = FunctionCall4Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype)); *distance_p = DatumGetFloat8(dist); } key++; distance_p++; keySize--; } return true; }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique) { BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return false; }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ Datum brininsert(PG_FUNCTION_ARGS) { Relation idxRel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *nulls = (bool *) PG_GETARG_POINTER(2); ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3); /* we ignore the rest of our arguments */ BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; #ifdef USE_ASSERT_CHECKING BrinTuple *tmptup; BrinMemTuple *tmpdtup; Size tmpsiz; #endif CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very * rarely: first create a placeholder tuple, and addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the * regular operation (straight addValue) below. * * Here we create the tuple to compare with; the actual comparison * is below. */ tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz); tmpdtup = brin_deform_tuple(bdesc, tmptup); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { BrinValues *bval; FmgrInfo *addValue; bval = &tmpdtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); } union_tuples(bdesc, tmpdtup, brtup); tmpdtup->bt_placeholder = dtup->bt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } #ifdef USE_ASSERT_CHECKING { /* * Now we can compare the tuple produced by the union function * with the one from plain addValue. */ BrinTuple *cmptup; Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); } #endif if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return BoolGetDatum(false); }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If autosummarization is enabled, check if we need to summarize the previous * page range. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do for this tuple. */ bool brininsert(Relation idxRel, Datum *values, bool *nulls, ItemPointer heaptid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo) { BlockNumber pagesPerRange; BlockNumber origHeapBlk; BlockNumber heapBlk; BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); /* * origHeapBlk is the block number where the insertion occurred. heapBlk * is the first block in the corresponding page range. */ origHeapBlk = ItemPointerGetBlockNumber(heaptid); heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange; for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; int keyno; CHECK_FOR_INTERRUPTS(); /* * If auto-summarization is enabled and we just inserted the first * tuple into the first block of a new non-first page range, request a * summarization run of the previous range. */ if (autosummarize && heapBlk > 0 && heapBlk == origHeapBlk && ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber) { BlockNumber lastPageRange = heapBlk - 1; BrinTuple *lastPageTuple; lastPageTuple = brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); if (!lastPageTuple) { bool recorded; recorded = AutoVacuumRequestWork(AVW_BRINSummarizeRange, RelationGetRelid(idxRel), lastPageRange); if (!recorded) ereport(LOG, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded", RelationGetRelationName(idxRel), lastPageRange))); } else LockBuffer(buf, BUFFER_LOCK_UNLOCK); } brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through in this statement? */ if (bdesc == NULL) { MemoryContextSwitchTo(indexInfo->ii_Context); bdesc = brin_build_desc(idxRel); indexInfo->ii_AmCache = (void *) bdesc; MemoryContextSwitchTo(oldcxt); } /* First time through in this brininsert call? */ if (tupcxt == NULL) { tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup, NULL); /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz, NULL, NULL); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); MemoryContextSwitchTo(oldcxt); if (tupcxt != NULL) MemoryContextDelete(tupcxt); return false; }