/* * Initialize a BrinBuildState appropriate to create tuples on the given index. */ static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange) { BrinBuildState *state; state = palloc(sizeof(BrinBuildState)); state->bs_irel = idxRel; state->bs_numtuples = 0; state->bs_currentInsertBuf = InvalidBuffer; state->bs_pagesPerRange = pagesPerRange; state->bs_currRangeStart = 0; state->bs_rmAccess = revmap; state->bs_bdesc = brin_build_desc(idxRel); state->bs_dtuple = brin_new_memtuple(state->bs_bdesc); brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); return state; }
/* * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of * brin_form_tuple. * * As an optimization, the caller can pass a previously allocated 'dMemtuple'. * This avoids having to allocate it here, which can be useful when this * function is called many times in a loop. It is caller's responsibility * that the given BrinMemTuple matches what we need here. * * Note we don't need the "on disk tupdesc" here; we rely on our own routine to * deconstruct the tuple from the on-disk format. */ BrinMemTuple * brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) { BrinMemTuple *dtup; Datum *values; bool *allnulls; bool *hasnulls; char *tp; bits8 *nullbits; int keyno; int valueno; MemoryContext oldcxt; dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) : brin_new_memtuple(brdesc); if (BrinTupleIsPlaceholder(tuple)) dtup->bt_placeholder = true; dtup->bt_blkno = tuple->bt_blkno; values = dtup->bt_values; allnulls = dtup->bt_allnulls; hasnulls = dtup->bt_hasnulls; tp = (char *) tuple + BrinTupleDataOffset(tuple); if (BrinTupleHasNulls(tuple)) nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); else nullbits = NULL; brin_deconstruct_tuple(brdesc, tp, nullbits, BrinTupleHasNulls(tuple), values, allnulls, hasnulls); /* * Iterate to assign each of the values to the corresponding item in the * values array of each column. The copies occur in the tuple's context. */ oldcxt = MemoryContextSwitchTo(dtup->bt_context); for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) { int i; if (allnulls[keyno]) { valueno += brdesc->bd_info[keyno]->oi_nstored; continue; } /* * We would like to skip datumCopy'ing the values datum in some cases, * caller permitting ... */ for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) dtup->bt_columns[keyno].bv_values[i] = datumCopy(values[valueno++], brdesc->bd_info[keyno]->oi_typcache[i]->typbyval, brdesc->bd_info[keyno]->oi_typcache[i]->typlen); dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; dtup->bt_columns[keyno].bv_allnulls = false; } MemoryContextSwitchTo(oldcxt); return dtup; }
/* * Execute the index scan. * * This works by reading index TIDs from the revmap, and obtaining the index * tuples pointed to by them; the summary values in the index tuples are * compared to the scan keys. We return into the TID bitmap all the pages in * ranges corresponding to index tuples that match the scan keys. * * If a TID from the revmap is read as InvalidTID, we know that range is * unsummarized. Pages in those ranges need to be returned regardless of scan * keys. */ int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { Relation idxRel = scan->indexRelation; Buffer buf = InvalidBuffer; BrinDesc *bdesc; Oid heapOid; Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; BlockNumber heapBlk; int totalpages = 0; FmgrInfo *consistentFn; MemoryContext oldcxt; MemoryContext perRangeCxt; BrinMemTuple *dtup; BrinTuple *btup = NULL; Size btupsz = 0; opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); /* * We need to know the size of the table so that we know how long to * iterate on the revmap. */ heapOid = IndexGetRelation(RelationGetRelid(idxRel), false); heapRel = table_open(heapOid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(heapRel); table_close(heapRel, AccessShareLock); /* * Make room for the consistent support procedures of indexed columns. We * don't look them up here; we do that lazily the first time we see a scan * key reference each of them. We rely on zeroing fn_oid to InvalidOid. */ consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts); /* allocate an initial in-memory tuple, out of the per-range memcxt */ dtup = brin_new_memtuple(bdesc); /* * Setup and use a per-range memory context, which is reset every time we * loop below. This avoids having to free the tuples within the loop. */ perRangeCxt = AllocSetContextCreate(CurrentMemoryContext, "bringetbitmap cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(perRangeCxt); /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full * view of the table. */ for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; bool gottuple = false; BrinTuple *tup; OffsetNumber off; Size size; CHECK_FOR_INTERRUPTS(); MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE, scan->xs_snapshot); if (tup) { gottuple = true; btup = brin_copy_tuple(tup, size, btup, &btupsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* * For page ranges with no indexed tuple, we must return the whole * range; otherwise, compare it to the scan keys. */ if (!gottuple) { addrange = true; } else { dtup = brin_deform_tuple(bdesc, btup, dtup); if (dtup->bt_placeholder) { /* * Placeholder tuples are always returned, regardless of the * values stored in them. */ addrange = true; } else { int keyno; /* * Compare scan keys with summary values stored for the range. * If scan keys are matched, the page range must be added to * the bitmap. We initially assume the range needs to be * added; in particular this serves the case where there are * no keys. */ addrange = true; for (keyno = 0; keyno < scan->numberOfKeys; keyno++) { ScanKey key = &scan->keyData[keyno]; AttrNumber keyattno = key->sk_attno; BrinValues *bval = &dtup->bt_columns[keyattno - 1]; Datum add; /* * The collation of the scan key must match the collation * used in the index column (but only if the search is not * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using * this index ... */ Assert((key->sk_flags & SK_ISNULL) || (key->sk_collation == TupleDescAttr(bdesc->bd_tupdesc, keyattno - 1)->attcollation)); /* First time this column? look up consistent function */ if (consistentFn[keyattno - 1].fn_oid == InvalidOid) { FmgrInfo *tmp; tmp = index_getprocinfo(idxRel, keyattno, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyattno - 1], tmp, CurrentMemoryContext); } /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added * to the output bitmap. * * When there are multiple scan keys, failure to meet the * criteria for a single one of them is enough to discard * the range as a whole, so break out of the loop as soon * as a false return value is obtained. */ add = FunctionCall3Coll(&consistentFn[keyattno - 1], key->sk_collation, PointerGetDatum(bdesc), PointerGetDatum(bval), PointerGetDatum(key)); addrange = DatumGetBool(add); if (!addrange) break; } } } /* add the pages in the range to the output bitmap, if needed */ if (addrange) { BlockNumber pageno; for (pageno = heapBlk; pageno <= heapBlk + opaque->bo_pagesPerRange - 1; pageno++) { MemoryContextSwitchTo(oldcxt); tbm_add_page(tbm, pageno); totalpages++; MemoryContextSwitchTo(perRangeCxt); } } } MemoryContextSwitchTo(oldcxt); MemoryContextDelete(perRangeCxt); if (buf != InvalidBuffer) ReleaseBuffer(buf); /* * XXX We have an approximation of the number of *pages* that our scan * returns, but we don't have a precise idea of the number of heap tuples * involved. */ return totalpages * 10; }