/* * SQL-callable function to scan through an index and summarize all ranges * that are not currently summarized. */ Datum brin_summarize_new_values(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); Oid heapoid; Relation indexRel; Relation heapRel; double numSummarized = 0; /* * We must lock table before index to avoid deadlocks. However, if the * passed indexoid isn't an index then IndexGetRelation() will fail. * Rather than emitting a not-very-helpful error message, postpone * complaining, expecting that the is-it-an-index test below will fail. */ heapoid = IndexGetRelation(indexoid, true); if (OidIsValid(heapoid)) heapRel = heap_open(heapoid, ShareUpdateExclusiveLock); else heapRel = NULL; indexRel = index_open(indexoid, ShareUpdateExclusiveLock); /* Must be a BRIN index */ if (indexRel->rd_rel->relkind != RELKIND_INDEX || indexRel->rd_rel->relam != BRIN_AM_OID) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a BRIN index", RelationGetRelationName(indexRel)))); /* User must own the index (comparable to privileges needed for VACUUM) */ if (!pg_class_ownercheck(indexoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, RelationGetRelationName(indexRel)); /* * Since we did the IndexGetRelation call above without any lock, it's * barely possible that a race against an index drop/recreation could have * netted us the wrong table. Recheck. */ if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("could not open parent table of index %s", RelationGetRelationName(indexRel)))); /* OK, do it */ brinsummarize(indexRel, heapRel, &numSummarized, NULL); relation_close(indexRel, ShareUpdateExclusiveLock); relation_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_INT32((int32) numSummarized); }
/* * This routine is in charge of "vacuuming" a BRIN index: we just summarize * ranges that are currently unsummarized. */ IndexBulkDeleteResult * brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) { Relation heapRel; /* No-op in ANALYZE ONLY mode */ if (info->analyze_only) return stats; if (!stats) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); stats->num_pages = RelationGetNumberOfBlocks(info->index); /* rest of stats is initialized by zeroing */ heapRel = heap_open(IndexGetRelation(RelationGetRelid(info->index), false), AccessShareLock); brin_vacuum_scan(info->index, info->strategy); brinsummarize(info->index, heapRel, &stats->num_index_tuples, &stats->num_index_tuples); heap_close(heapRel, AccessShareLock); return stats; }
/* * This routine is in charge of "vacuuming" a BRIN index: we just summarize * ranges that are currently unsummarized. */ Datum brinvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation heapRel; /* No-op in ANALYZE ONLY mode */ if (info->analyze_only) PG_RETURN_POINTER(stats); if (!stats) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); stats->num_pages = RelationGetNumberOfBlocks(info->index); /* rest of stats is initialized by zeroing */ heapRel = heap_open(IndexGetRelation(RelationGetRelid(info->index), false), AccessShareLock); brinsummarize(info->index, heapRel, &stats->num_index_tuples, &stats->num_index_tuples); heap_close(heapRel, AccessShareLock); PG_RETURN_POINTER(stats); }
/* * SQL-callable function to scan through an index and summarize all ranges * that are not currently summarized. */ Datum brin_summarize_new_values(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); Relation indexRel; Relation heapRel; double numSummarized = 0; heapRel = heap_open(IndexGetRelation(indexoid, false), ShareUpdateExclusiveLock); indexRel = index_open(indexoid, ShareUpdateExclusiveLock); brinsummarize(indexRel, heapRel, &numSummarized, NULL); relation_close(indexRel, ShareUpdateExclusiveLock); relation_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_INT32((int32) numSummarized); }
/* * Execute the index scan. * * This works by reading index TIDs from the revmap, and obtaining the index * tuples pointed to by them; the summary values in the index tuples are * compared to the scan keys. We return into the TID bitmap all the pages in * ranges corresponding to index tuples that match the scan keys. * * If a TID from the revmap is read as InvalidTID, we know that range is * unsummarized. Pages in those ranges need to be returned regardless of scan * keys. */ int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { Relation idxRel = scan->indexRelation; Buffer buf = InvalidBuffer; BrinDesc *bdesc; Oid heapOid; Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; BlockNumber heapBlk; int totalpages = 0; FmgrInfo *consistentFn; MemoryContext oldcxt; MemoryContext perRangeCxt; opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); /* * We need to know the size of the table so that we know how long to * iterate on the revmap. */ heapOid = IndexGetRelation(RelationGetRelid(idxRel), false); heapRel = heap_open(heapOid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(heapRel); heap_close(heapRel, AccessShareLock); /* * Make room for the consistent support procedures of indexed columns. We * don't look them up here; we do that lazily the first time we see a scan * key reference each of them. We rely on zeroing fn_oid to InvalidOid. */ consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts); /* * Setup and use a per-range memory context, which is reset every time we * loop below. This avoids having to free the tuples within the loop. */ perRangeCxt = AllocSetContextCreate(CurrentMemoryContext, "bringetbitmap cxt", ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(perRangeCxt); /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full * view of the table. */ for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; BrinTuple *tup; OffsetNumber off; Size size; CHECK_FOR_INTERRUPTS(); MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE, scan->xs_snapshot); if (tup) { tup = brin_copy_tuple(tup, size); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* * For page ranges with no indexed tuple, we must return the whole * range; otherwise, compare it to the scan keys. */ if (tup == NULL) { addrange = true; } else { BrinMemTuple *dtup; dtup = brin_deform_tuple(bdesc, tup); if (dtup->bt_placeholder) { /* * Placeholder tuples are always returned, regardless of the * values stored in them. */ addrange = true; } else { int keyno; /* * Compare scan keys with summary values stored for the range. * If scan keys are matched, the page range must be added to * the bitmap. We initially assume the range needs to be * added; in particular this serves the case where there are * no keys. */ addrange = true; for (keyno = 0; keyno < scan->numberOfKeys; keyno++) { ScanKey key = &scan->keyData[keyno]; AttrNumber keyattno = key->sk_attno; BrinValues *bval = &dtup->bt_columns[keyattno - 1]; Datum add; /* * The collation of the scan key must match the collation * used in the index column (but only if the search is not * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using * this index ... */ Assert((key->sk_flags & SK_ISNULL) || (key->sk_collation == bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation)); /* First time this column? look up consistent function */ if (consistentFn[keyattno - 1].fn_oid == InvalidOid) { FmgrInfo *tmp; tmp = index_getprocinfo(idxRel, keyattno, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyattno - 1], tmp, CurrentMemoryContext); } /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added * to the output bitmap. * * When there are multiple scan keys, failure to meet the * criteria for a single one of them is enough to discard * the range as a whole, so break out of the loop as soon * as a false return value is obtained. */ add = FunctionCall3Coll(&consistentFn[keyattno - 1], key->sk_collation, PointerGetDatum(bdesc), PointerGetDatum(bval), PointerGetDatum(key)); addrange = DatumGetBool(add); if (!addrange) break; } } } /* add the pages in the range to the output bitmap, if needed */ if (addrange) { BlockNumber pageno; for (pageno = heapBlk; pageno <= heapBlk + opaque->bo_pagesPerRange - 1; pageno++) { MemoryContextSwitchTo(oldcxt); tbm_add_page(tbm, pageno); totalpages++; MemoryContextSwitchTo(perRangeCxt); } } } MemoryContextSwitchTo(oldcxt); MemoryContextDelete(perRangeCxt); if (buf != InvalidBuffer) ReleaseBuffer(buf); /* * XXX We have an approximation of the number of *pages* that our scan * returns, but we don't have a precise idea of the number of heap tuples * involved. */ return totalpages * 10; }
/* * SQL-callable interface to mark a range as no longer summarized */ Datum brin_desummarize_range(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); int64 heapBlk64 = PG_GETARG_INT64(1); BlockNumber heapBlk; Oid heapoid; Relation heapRel; Relation indexRel; bool done; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("BRIN control functions cannot be executed during recovery."))); if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0) { char *blk = psprintf(INT64_FORMAT, heapBlk64); ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("block number out of range: %s", blk))); } heapBlk = (BlockNumber) heapBlk64; /* * We must lock table before index to avoid deadlocks. However, if the * passed indexoid isn't an index then IndexGetRelation() will fail. * Rather than emitting a not-very-helpful error message, postpone * complaining, expecting that the is-it-an-index test below will fail. */ heapoid = IndexGetRelation(indexoid, true); if (OidIsValid(heapoid)) heapRel = table_open(heapoid, ShareUpdateExclusiveLock); else heapRel = NULL; indexRel = index_open(indexoid, ShareUpdateExclusiveLock); /* Must be a BRIN index */ if (indexRel->rd_rel->relkind != RELKIND_INDEX || indexRel->rd_rel->relam != BRIN_AM_OID) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a BRIN index", RelationGetRelationName(indexRel)))); /* User must own the index (comparable to privileges needed for VACUUM) */ if (!pg_class_ownercheck(indexoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, RelationGetRelationName(indexRel)); /* * Since we did the IndexGetRelation call above without any lock, it's * barely possible that a race against an index drop/recreation could have * netted us the wrong table. Recheck. */ if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("could not open parent table of index %s", RelationGetRelationName(indexRel)))); /* the revmap does the hard work */ do { done = brinRevmapDesummarizeRange(indexRel, heapBlk); } while (!done); relation_close(indexRel, ShareUpdateExclusiveLock); relation_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_VOID(); }
/* * index_drop * * NOTE: this routine should now only be called through performDeletion(), * else associated dependencies won't be cleaned up. */ void index_drop(Oid indexId) { Oid heapId; Relation userHeapRelation; Relation userIndexRelation; Relation indexRelation; HeapTuple tuple; int i; Assert(OidIsValid(indexId)); /* * To drop an index safely, we must grab exclusive lock on its parent * table; otherwise there could be other backends using the index! * Exclusive lock on the index alone is insufficient because another * backend might be in the midst of devising a query plan that will * use the index. The parser and planner take care to hold an * appropriate lock on the parent table while working, but having them * hold locks on all the indexes too seems overly complex. We do grab * exclusive lock on the index too, just to be safe. Both locks must * be held till end of transaction, else other backends will still see * this index in pg_index. */ heapId = IndexGetRelation(indexId); userHeapRelation = heap_open(heapId, AccessExclusiveLock); userIndexRelation = index_open(indexId); LockRelation(userIndexRelation, AccessExclusiveLock); /* * fix RELATION relation */ DeleteRelationTuple(indexId); /* * fix ATTRIBUTE relation */ DeleteAttributeTuples(indexId); /* * fix INDEX relation */ indexRelation = heap_openr(IndexRelationName, RowExclusiveLock); tuple = SearchSysCache(INDEXRELID, ObjectIdGetDatum(indexId), 0, 0, 0); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for index %u", indexId); simple_heap_delete(indexRelation, &tuple->t_self); ReleaseSysCache(tuple); heap_close(indexRelation, RowExclusiveLock); /* * flush buffer cache and physically remove the file */ i = FlushRelationBuffers(userIndexRelation, (BlockNumber) 0); if (i < 0) elog(ERROR, "FlushRelationBuffers returned %d", i); smgrunlink(DEFAULT_SMGR, userIndexRelation); /* * We are presently too lazy to attempt to compute the new correct * value of relhasindex (the next VACUUM will fix it if necessary). So * there is no need to update the pg_class tuple for the owning * relation. But we must send out a shared-cache-inval notice on the * owning relation to ensure other backends update their relcache * lists of indexes. */ CacheInvalidateRelcache(heapId); /* * Close rels, but keep locks */ index_close(userIndexRelation); heap_close(userHeapRelation, NoLock); RelationForgetRelation(indexId); }