/* * systable_beginscan_ordered --- set up for ordered catalog scan * * These routines have essentially the same API as systable_beginscan etc, * except that they guarantee to return multiple matching tuples in * index order. Also, for largely historical reasons, the index to use * is opened and locked by the caller, not here. * * Currently we do not support non-index-based scans here. (In principle * we could do a heapscan and sort, but the uses are in places that * probably don't need to still work with corrupted catalog indexes.) * For the moment, therefore, these functions are merely the thinnest of * wrappers around index_beginscan/index_getnext. The main reason for their * existence is to centralize possible future support of lossy operators * in catalog scans. */ SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; int i; /* REINDEX can probably be a hard error here ... */ if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", RelationGetRelationName(indexRelation)); /* ... but we only throw a warning about violating IgnoreSystemIndexes */ if (IgnoreSystemIndexes) elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes", RelationGetRelationName(indexRelation)); sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < indexRelation->rd_index->indnatts; j++) { if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == indexRelation->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, indexRelation, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; return sysscan; }
/* * Create partitions and return an OID of the partition that contain value */ Oid create_partitions(Oid relid, Datum value, Oid value_type, bool *crashed) { int ret; RangeEntry *ranges; Datum vals[2]; Oid oids[] = {OIDOID, value_type}; bool nulls[] = {false, false}; char *sql; bool found; int pos; PartRelationInfo *prel; RangeRelation *rangerel; FmgrInfo cmp_func; char *schema; *crashed = false; schema = get_extension_schema(); prel = get_pathman_relation_info(relid, NULL); rangerel = get_pathman_range_relation(relid, NULL); ranges = dsm_array_get_pointer(&rangerel->ranges); /* Comparison function */ cmp_func = *get_cmp_func(value_type, prel->atttype); vals[0] = ObjectIdGetDatum(relid); vals[1] = value; /* Perform PL procedure */ sql = psprintf("SELECT %s.append_partitions_on_demand_internal($1, $2)", schema); PG_TRY(); { ret = SPI_execute_with_args(sql, 2, oids, vals, nulls, false, 0); if (ret > 0) { /* Update relation info */ free_dsm_array(&rangerel->ranges); free_dsm_array(&prel->children); load_check_constraints(relid, GetCatalogSnapshot(relid)); } } PG_CATCH(); { elog(WARNING, "Attempt to create new partitions failed"); if (crashed != NULL) *crashed = true; return 0; } PG_END_TRY(); /* Repeat binary search */ ranges = dsm_array_get_pointer(&rangerel->ranges); pos = range_binary_search(rangerel, &cmp_func, value, &found); if (found) return ranges[pos].child_oid; return 0; }
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) { Oid relid = RelationGetRelid(relation); Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, NULL, true, true, true, false, false, true); }
/* * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple * * In particular, determine if this tuple would be visible to a catalog scan * that started now. We don't handle the case of a non-MVCC scan snapshot, * because no caller needs that yet. * * This is useful to test whether an object was deleted while we waited to * acquire lock on it. * * Note: we don't actually *need* the tuple to be passed in, but it's a * good crosscheck that the caller is interested in the right tuple. */ bool systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) { Snapshot freshsnap; bool result; /* * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves * acquire snapshots, so we need not register the snapshot. Those * facilities are too low-level to have any business scanning tables. */ freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel)); if (sysscan->irel) { IndexScanDesc scan = sysscan->iscan; Assert(IsMVCCSnapshot(scan->xs_snapshot)); Assert(tup == &scan->xs_ctup); Assert(BufferIsValid(scan->xs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); } else { HeapScanDesc scan = sysscan->scan; Assert(IsMVCCSnapshot(scan->rs_snapshot)); Assert(tup == &scan->rs_ctup); Assert(BufferIsValid(scan->rs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); } return result; }
/* * Load GUC settings from pg_db_role_setting. * * We try specific settings for the database/role combination, as well as * general for this database and for this user. */ static void process_settings(Oid databaseid, Oid roleid) { Relation relsetting; Snapshot snapshot; if (!IsUnderPostmaster) return; relsetting = heap_open(DbRoleSettingRelationId, AccessShareLock); /* read all the settings under the same snapsot for efficiency */ snapshot = RegisterSnapshot(GetCatalogSnapshot(DbRoleSettingRelationId)); /* Later settings are ignored if set earlier. */ ApplySetting(snapshot, databaseid, roleid, relsetting, PGC_S_DATABASE_USER); ApplySetting(snapshot, InvalidOid, roleid, relsetting, PGC_S_USER); ApplySetting(snapshot, databaseid, InvalidOid, relsetting, PGC_S_DATABASE); ApplySetting(snapshot, InvalidOid, InvalidOid, relsetting, PGC_S_GLOBAL); UnregisterSnapshot(snapshot); heap_close(relsetting, AccessShareLock); }
/* * systable_beginscan --- set up for heap-or-index scan * * rel: catalog to scan, already opened and suitably locked * indexId: OID of index to conditionally use * indexOK: if false, forces a heap scan (see notes below) * snapshot: time qual to use (NULL for a recent catalog snapshot) * nkeys, key: scan keys * * The attribute numbers in the scan key should be set for the heap case. * If we choose to index, we reset them to 1..n to reference the index * columns. Note this means there must be one scankey qualification per * index column! This is checked by the Asserts in the normal, index-using * case, but won't be checked if the heapscan path is taken. * * The routine checks the normal cases for whether an indexscan is safe, * but caller can make additional checks and pass indexOK=false if needed. * In standard case indexOK can simply be constant TRUE. */ SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; Relation irel; if (indexOK && !IgnoreSystemIndexes && !ReindexIsProcessingIndex(indexId)) irel = index_open(indexId, AccessShareLock); else irel = NULL; sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = irel; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } if (irel) { int i; /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < irel->rd_index->indnatts; j++) { if (key[i].sk_attno == irel->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == irel->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, irel, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; } else { /* * We disallow synchronized scans when forced to use a heapscan on a * catalog. In most cases the desired rows are near the front, so * that the unpredictable start point of a syncscan is a serious * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, nkeys, key, true, false); sysscan->iscan = NULL; } return sysscan; }
/* * Performs a compaction of an append-only relation in column-orientation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. If * set, the insert_segno should also be marked as in-use. * When the insert segno is negative, only truncate to eof operations * can be executed. * * The caller is required to hold either an AccessExclusiveLock (vacuum full) * or a ShareLock on the relation. */ void AOCSCompact(Relation aorel, List *compaction_segno, int insert_segno, bool isFull) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; AOCSInsertDesc insertDesc = NULL; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(insert_segno >= 0); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); if (insert_segno >= 0) { insertDesc = aocs_insert_init(aorel, insert_segno, false); } for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } if (segno == insert_segno) { /* We cannot compact the segment file we are inserting to. */ continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "compaction skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); if (AppendOnlyCompaction_ShouldCompact(aorel, fsinfo->segno, fsinfo->total_tupcount, isFull, appendOnlyMetaDataSnapshot)) { AOCSSegmentFileFullCompaction(aorel, insertDesc, fsinfo, appendOnlyMetaDataSnapshot); } pfree(fsinfo); } if (insertDesc != NULL) aocs_insert_finish(insertDesc); if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
/* * Performs a compaction of an append-only AOCS relation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. * */ void AOCSDrop(Relation aorel, List *compaction_segno) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Drop AOCS relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "drop skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); if (fsinfo->state == AOSEG_STATE_AWAITING_DROP) { Assert(HasLockForSegmentFileDrop(aorel)); AOCSCompaction_DropSegmentFile(aorel, segno); ClearAOCSFileSegInfo(aorel, segno, AOSEG_STATE_DEFAULT); } pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
/* * Truncates each segment file to the AOCS relation to its EOF. * If we cannot get a lock on the segment file (because e.g. a concurrent insert) * the segment file is skipped. */ void AOCSTruncateToEOF(Relation aorel) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "truncate skips AO segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); AOCSSegmentFileTruncateToEOF(aorel, fsinfo); pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }