/* * Per-heap-tuple callback for IndexBuildHeapScan. * * Note we don't worry about the page range at the end of the table here; it is * present in the build state struct after we're called the last time, but not * inserted into the index. Caller must ensure to do so, if appropriate. */ static void brinbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *brstate) { BrinBuildState *state = (BrinBuildState *) brstate; BlockNumber thisblock; int i; thisblock = ItemPointerGetBlockNumber(&htup->t_self); /* * If we're in a block that belongs to a future range, summarize what * we've got and start afresh. Note the scan might have skipped many * pages, if they were devoid of live tuples; make sure to insert index * tuples for those too. */ while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1) { BRIN_elog((DEBUG2, "brinbuildCallback: completed a range: %u--%u", state->bs_currRangeStart, state->bs_currRangeStart + state->bs_pagesPerRange)); /* create the index tuple and insert it */ form_and_insert_tuple(state); /* set state to correspond to the next range */ state->bs_currRangeStart += state->bs_pagesPerRange; /* re-initialize state for it */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); } /* Accumulate the current tuple into the running state */ for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++) { FmgrInfo *addValue; BrinValues *col; Form_pg_attribute attr = TupleDescAttr(state->bs_bdesc->bd_tupdesc, i); col = &state->bs_dtuple->bt_columns[i]; addValue = index_getprocinfo(index, i + 1, BRIN_PROCNUM_ADDVALUE); /* * Update dtuple state, if and as necessary. */ FunctionCall4Coll(addValue, attr->attcollation, PointerGetDatum(state->bs_bdesc), PointerGetDatum(col), values[i], isnull[i]); } }
/* * Initialize a BrinBuildState appropriate to create tuples on the given index. */ static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange) { BrinBuildState *state; state = palloc(sizeof(BrinBuildState)); state->bs_irel = idxRel; state->bs_numtuples = 0; state->bs_currentInsertBuf = InvalidBuffer; state->bs_pagesPerRange = pagesPerRange; state->bs_currRangeStart = 0; state->bs_rmAccess = revmap; state->bs_bdesc = brin_build_desc(idxRel); state->bs_dtuple = brin_new_memtuple(state->bs_bdesc); brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); return state; }
/* * Create a new BrinMemTuple from scratch, and initialize it to an empty * state. * * Note: we don't provide any means to free a deformed tuple, so make sure to * use a temporary memory context. */ BrinMemTuple * brin_new_memtuple(BrinDesc *brdesc) { BrinMemTuple *dtup; long basesize; basesize = MAXALIGN(sizeof(BrinMemTuple) + sizeof(BrinValues) * brdesc->bd_tupdesc->natts); dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored); dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored); dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, "brin dtuple", ALLOCSET_DEFAULT_SIZES); brin_memtuple_initialize(dtup, brdesc); return dtup; }
/* * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of * brin_form_tuple. * * As an optimization, the caller can pass a previously allocated 'dMemtuple'. * This avoids having to allocate it here, which can be useful when this * function is called many times in a loop. It is caller's responsibility * that the given BrinMemTuple matches what we need here. * * Note we don't need the "on disk tupdesc" here; we rely on our own routine to * deconstruct the tuple from the on-disk format. */ BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) { BrinMemTuple *dtup; Datum *values; bool *allnulls; bool *hasnulls; char *tp; bits8 *nullbits; int keyno; int valueno; MemoryContext oldcxt; dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) : brin_new_memtuple(brdesc); if (BrinTupleIsPlaceholder(tuple)) dtup->bt_placeholder = true; dtup->bt_blkno = tuple->bt_blkno; values = dtup->bt_values; allnulls = dtup->bt_allnulls; hasnulls = dtup->bt_hasnulls; tp = (char *) tuple + BrinTupleDataOffset(tuple); if (BrinTupleHasNulls(tuple)) nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); else nullbits = NULL; brin_deconstruct_tuple(brdesc, tp, nullbits, BrinTupleHasNulls(tuple), values, allnulls, hasnulls); /* * Iterate to assign each of the values to the corresponding item in the * values array of each column. The copies occur in the tuple's context. */ oldcxt = MemoryContextSwitchTo(dtup->bt_context); for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) { int i; if (allnulls[keyno]) { valueno += brdesc->bd_info[keyno]->oi_nstored; continue; } /* * We would like to skip datumCopy'ing the values datum in some cases, * caller permitting ... */ for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) dtup->bt_columns[keyno].bv_values[i] = datumCopy(values[valueno++], brdesc->bd_info[keyno]->oi_typcache[i]->typbyval, brdesc->bd_info[keyno]->oi_typcache[i]->typlen); dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; dtup->bt_columns[keyno].bv_allnulls = false; } MemoryContextSwitchTo(oldcxt); return dtup; }
/* * Scan a complete BRIN index, and summarize each page range that's not already * summarized. The index and heap must have been locked by caller in at * least ShareUpdateExclusiveLock mode. * * For each new index tuple inserted, *numSummarized (if not NULL) is * incremented; for each existing tuple, *numExisting (if not NULL) is * incremented. */ static void brinsummarize(Relation index, Relation heapRel, double *numSummarized, double *numExisting) { BrinRevmap *revmap; BrinBuildState *state = NULL; IndexInfo *indexInfo = NULL; BlockNumber heapNumBlocks; BlockNumber heapBlk; BlockNumber pagesPerRange; Buffer buf; revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); /* * Scan the revmap to find unsummarized items. */ buf = InvalidBuffer; heapNumBlocks = RelationGetNumberOfBlocks(heapRel); for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange) { BrinTuple *tup; OffsetNumber off; CHECK_FOR_INTERRUPTS(); tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); if (tup == NULL) { /* no revmap entry for this heap range. Summarize it. */ if (state == NULL) { /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, pagesPerRange); indexInfo = BuildIndexInfo(index); } summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks); /* and re-initialize state for the next range */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); if (numSummarized) *numSummarized += 1.0; } else { if (numExisting) *numExisting += 1.0; LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } if (BufferIsValid(buf)) ReleaseBuffer(buf); /* free resources */ brinRevmapTerminate(revmap); if (state) { terminate_brin_buildstate(state); pfree(indexInfo); } }
/* * Scan a complete BRIN index, and summarize each page range that's not already * summarized. The index and heap must have been locked by caller in at * least ShareUpdateExclusiveLock mode. * * For each new index tuple inserted, *numSummarized (if not NULL) is * incremented; for each existing tuple, numExisting (if not NULL) is * incremented. */ static void brinsummarize(Relation index, Relation heapRel, double *numSummarized, double *numExisting) { BrinRevmap *revmap; BrinBuildState *state = NULL; IndexInfo *indexInfo = NULL; BlockNumber heapNumBlocks; BlockNumber heapBlk; BlockNumber pagesPerRange; Buffer buf; revmap = brinRevmapInitialize(index, &pagesPerRange); /* * Scan the revmap to find unsummarized items. */ buf = InvalidBuffer; heapNumBlocks = RelationGetNumberOfBlocks(heapRel); for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange) { BrinTuple *tup; OffsetNumber off; CHECK_FOR_INTERRUPTS(); tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE); if (tup == NULL) { /* no revmap entry for this heap range. Summarize it. */ if (state == NULL) { /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, pagesPerRange); indexInfo = BuildIndexInfo(index); /* * We only have ShareUpdateExclusiveLock on the table, and * therefore other sessions may insert tuples into the range * we're going to scan. This is okay, because we take * additional precautions to avoid losing the additional * tuples; see comments in summarize_range. Set the * concurrent flag, which causes IndexBuildHeapRangeScan to * use a snapshot other than SnapshotAny, and silences * warnings emitted there. */ indexInfo->ii_Concurrent = true; /* * If using transaction-snapshot mode, it would be possible * for another transaction to insert a tuple that's not * visible to our snapshot if we have already acquired one, * when in snapshot-isolation mode; therefore, disallow this * from running in such a transaction unless a snapshot hasn't * been acquired yet. * * This code is called by VACUUM and * brin_summarize_new_values. Have the error message mention * the latter because VACUUM cannot run in a transaction and * thus cannot cause this issue. */ if (IsolationUsesXactSnapshot() && FirstSnapshotSet) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("brin_summarize_new_values() cannot run in a transaction that has already obtained a snapshot"))); } summarize_range(indexInfo, state, heapRel, heapBlk); /* and re-initialize state for the next range */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); if (numSummarized) *numSummarized += 1.0; } else { if (numExisting) *numExisting += 1.0; LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } if (BufferIsValid(buf)) ReleaseBuffer(buf); /* free resources */ brinRevmapTerminate(revmap); if (state) terminate_brin_buildstate(state); }
/* * Summarize page ranges that are not already summarized. If pageRange is * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the * page range containing the given heap page number is scanned. * If include_partial is true, then the partial range at the end of the table * is summarized, otherwise not. * * For each new index tuple inserted, *numSummarized (if not NULL) is * incremented; for each existing tuple, *numExisting (if not NULL) is * incremented. */ static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting) { BrinRevmap *revmap; BrinBuildState *state = NULL; IndexInfo *indexInfo = NULL; BlockNumber heapNumBlocks; BlockNumber pagesPerRange; Buffer buf; BlockNumber startBlk; revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); /* determine range of pages to process */ heapNumBlocks = RelationGetNumberOfBlocks(heapRel); if (pageRange == BRIN_ALL_BLOCKRANGES) startBlk = 0; else { startBlk = (pageRange / pagesPerRange) * pagesPerRange; heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange); } if (startBlk > heapNumBlocks) { /* Nothing to do if start point is beyond end of table */ brinRevmapTerminate(revmap); return; } /* * Scan the revmap to find unsummarized items. */ buf = InvalidBuffer; for (; startBlk < heapNumBlocks; startBlk += pagesPerRange) { BrinTuple *tup; OffsetNumber off; /* * Unless requested to summarize even a partial range, go away now if * we think the next range is partial. Caller would pass true when it * is typically run once bulk data loading is done * (brin_summarize_new_values), and false when it is typically the * result of arbitrarily-scheduled maintenance command (vacuuming). */ if (!include_partial && (startBlk + pagesPerRange > heapNumBlocks)) break; CHECK_FOR_INTERRUPTS(); tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); if (tup == NULL) { /* no revmap entry for this heap range. Summarize it. */ if (state == NULL) { /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, pagesPerRange); indexInfo = BuildIndexInfo(index); } summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks); /* and re-initialize state for the next range */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); if (numSummarized) *numSummarized += 1.0; } else { if (numExisting) *numExisting += 1.0; LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } if (BufferIsValid(buf)) ReleaseBuffer(buf); /* free resources */ brinRevmapTerminate(revmap); if (state) { terminate_brin_buildstate(state); pfree(indexInfo); } }