/* * On the given BRIN index, summarize the heap page range that corresponds * to the heap block number given. * * This routine can run in parallel with insertions into the heap. To avoid * missing those values from the summary tuple, we first insert a placeholder * index tuple into the index, then execute the heap scan; transactions * concurrent with the scan update the placeholder tuple. After the scan, we * union the placeholder tuple with the one computed by this routine. The * update of the index value happens in a loop, so that if somebody updates * the placeholder tuple after we read it, we detect the case and try again. * This ensures that the concurrently inserted tuples are not lost. * * A further corner case is this routine being asked to summarize the partial * range at the end of the table. heapNumBlocks is the (possibly outdated) * table size; if we notice that the requested range lies beyond that size, * we re-compute the table size after inserting the placeholder tuple, to * avoid missing pages that were appended recently. */ static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks) { Buffer phbuf; BrinTuple *phtup; Size phsz; OffsetNumber offset; BlockNumber scanNumBlks; /* * Insert the placeholder tuple */ phbuf = InvalidBuffer; phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz); offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, &phbuf, heapBlk, phtup, phsz); /* * Compute range end. We hold ShareUpdateExclusive lock on table, so it * cannot shrink concurrently (but it can grow). */ Assert(heapBlk % state->bs_pagesPerRange == 0); if (heapBlk + state->bs_pagesPerRange > heapNumBlks) { /* * If we're asked to scan what we believe to be the final range on the * table (i.e. a range that might be partial) we need to recompute our * idea of what the latest page is after inserting the placeholder * tuple. Anyone that grows the table later will update the * placeholder tuple, so it doesn't matter that we won't scan these * pages ourselves. Careful: the table might have been extended * beyond the current range, so clamp our result. * * Fortunately, this should occur infrequently. */ scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk, state->bs_pagesPerRange); } else { /* Easy case: range is known to be complete */ scanNumBlks = state->bs_pagesPerRange; } /* * Execute the partial heap scan covering the heap blocks in the specified * page range, summarizing the heap tuples in it. This scan stops just * short of brinbuildCallback creating the new index entry. * * Note that it is critical we use the "any visible" mode of * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted * by transactions that are still in progress, among other corner cases. */ state->bs_currRangeStart = heapBlk; IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, heapBlk, scanNumBlks, brinbuildCallback, (void *) state); /* * Now we update the values obtained by the scan with the placeholder * tuple. We do this in a loop which only terminates if we're able to * update the placeholder tuple successfully; if we are not, this means * somebody else modified the placeholder tuple after we read it. */ for (;;) { BrinTuple *newtup; Size newsize; bool didupdate; bool samepage; CHECK_FOR_INTERRUPTS(); /* * Update the summary tuple and try to update. */ newtup = brin_form_tuple(state->bs_bdesc, heapBlk, state->bs_dtuple, &newsize); samepage = brin_can_do_samepage_update(phbuf, phsz, newsize); didupdate = brin_doupdate(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, heapBlk, phbuf, offset, phtup, phsz, newtup, newsize, samepage); brin_free_tuple(phtup); brin_free_tuple(newtup); /* If the update succeeded, we're done. */ if (didupdate) break; /* * If the update didn't work, it might be because somebody updated the * placeholder tuple concurrently. Extract the new version, union it * with the values we have from the scan, and start over. (There are * other reasons for the update to fail, but it's simple to treat them * the same.) */ phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf, &offset, &phsz, BUFFER_LOCK_SHARE, NULL); /* the placeholder tuple must exist */ if (phtup == NULL) elog(ERROR, "missing placeholder tuple"); phtup = brin_copy_tuple(phtup, phsz); LockBuffer(phbuf, BUFFER_LOCK_UNLOCK); /* merge it into the tuple from the heap scan */ union_tuples(state->bs_bdesc, state->bs_dtuple, phtup); } ReleaseBuffer(phbuf); }
/* * Summarize the given page range of the given index. * * This routine can run in parallel with insertions into the heap. To avoid * missing those values from the summary tuple, we first insert a placeholder * index tuple into the index, then execute the heap scan; transactions * concurrent with the scan update the placeholder tuple. After the scan, we * union the placeholder tuple with the one computed by this routine. The * update of the index value happens in a loop, so that if somebody updates * the placeholder tuple after we read it, we detect the case and try again. * This ensures that the concurrently inserted tuples are not lost. */ static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, BlockNumber heapBlk, BlockNumber heapNumBlks) { Buffer phbuf; BrinTuple *phtup; Size phsz; OffsetNumber offset; BlockNumber scanNumBlks; /* * Insert the placeholder tuple */ phbuf = InvalidBuffer; phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz); offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, &phbuf, heapBlk, phtup, phsz); /* * Execute the partial heap scan covering the heap blocks in the specified * page range, summarizing the heap tuples in it. This scan stops just * short of brinbuildCallback creating the new index entry. * * Note that it is critical we use the "any visible" mode of * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted * by transactions that are still in progress, among other corner cases. */ state->bs_currRangeStart = heapBlk; scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ? state->bs_pagesPerRange : heapNumBlks - heapBlk; IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, heapBlk, scanNumBlks, brinbuildCallback, (void *) state); /* * Now we update the values obtained by the scan with the placeholder * tuple. We do this in a loop which only terminates if we're able to * update the placeholder tuple successfully; if we are not, this means * somebody else modified the placeholder tuple after we read it. */ for (;;) { BrinTuple *newtup; Size newsize; bool didupdate; bool samepage; CHECK_FOR_INTERRUPTS(); /* * Update the summary tuple and try to update. */ newtup = brin_form_tuple(state->bs_bdesc, heapBlk, state->bs_dtuple, &newsize); samepage = brin_can_do_samepage_update(phbuf, phsz, newsize); didupdate = brin_doupdate(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, heapBlk, phbuf, offset, phtup, phsz, newtup, newsize, samepage); brin_free_tuple(phtup); brin_free_tuple(newtup); /* If the update succeeded, we're done. */ if (didupdate) break; /* * If the update didn't work, it might be because somebody updated the * placeholder tuple concurrently. Extract the new version, union it * with the values we have from the scan, and start over. (There are * other reasons for the update to fail, but it's simple to treat them * the same.) */ phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf, &offset, &phsz, BUFFER_LOCK_SHARE, NULL); /* the placeholder tuple must exist */ if (phtup == NULL) elog(ERROR, "missing placeholder tuple"); phtup = brin_copy_tuple(phtup, phsz); LockBuffer(phbuf, BUFFER_LOCK_UNLOCK); /* merge it into the tuple from the heap scan */ union_tuples(state->bs_bdesc, state->bs_dtuple, phtup); } ReleaseBuffer(phbuf); }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ Datum brininsert(PG_FUNCTION_ARGS) { Relation idxRel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *nulls = (bool *) PG_GETARG_POINTER(2); ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3); /* we ignore the rest of our arguments */ BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; #ifdef USE_ASSERT_CHECKING BrinTuple *tmptup; BrinMemTuple *tmpdtup; Size tmpsiz; #endif CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very * rarely: first create a placeholder tuple, and addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the * regular operation (straight addValue) below. * * Here we create the tuple to compare with; the actual comparison * is below. */ tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz); tmpdtup = brin_deform_tuple(bdesc, tmptup); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { BrinValues *bval; FmgrInfo *addValue; bval = &tmpdtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); } union_tuples(bdesc, tmpdtup, brtup); tmpdtup->bt_placeholder = dtup->bt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } #ifdef USE_ASSERT_CHECKING { /* * Now we can compare the tuple produced by the union function * with the one from plain addValue. */ BrinTuple *cmptup; Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); } #endif if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return BoolGetDatum(false); }