/* * CreateQueryDesc */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, int instrument_options) { QueryDesc *qd = (QueryDesc *) palloc(sizeof(QueryDesc)); qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ qd->utilitystmt = plannedstmt->utilityStmt; /* in case DECLARE CURSOR */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ qd->crosscheck_snapshot = RegisterSnapshot(crosscheck_snapshot); qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->instrument_options = instrument_options; /* instrumentation * wanted? */ /* null these fields until set by ExecutorStart */ qd->tupDesc = NULL; qd->estate = NULL; qd->planstate = NULL; qd->totaltime = NULL; return qd; }
EState * CreateEState(QueryDesc *query_desc) { EState *estate; estate = CreateExecutorState(); estate->es_param_list_info = query_desc->params; estate->es_snapshot = RegisterSnapshot(query_desc->snapshot); estate->es_crosscheck_snapshot = RegisterSnapshot(query_desc->crosscheck_snapshot); estate->es_instrument = query_desc->instrument_options; estate->es_range_table = query_desc->plannedstmt->rtable; estate->es_continuous = query_desc->plannedstmt->is_continuous; estate->es_lastoid = InvalidOid; estate->es_processed = estate->es_filtered = 0; if (query_desc->plannedstmt->nParamExec > 0) estate->es_param_exec_vals = (ParamExecData *) palloc0(query_desc->plannedstmt->nParamExec * sizeof(ParamExecData)); estate->es_top_eflags |= EXEC_FLAG_SKIP_TRIGGERS; return estate; }
Datum currtid_byrelname(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); ItemPointer tid = PG_GETARG_ITEMPOINTER(1); ItemPointer result; RangeVar *relrv; Relation rel; AclResult aclresult; Snapshot snapshot; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = heap_openrv(relrv, AccessShareLock); aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), ACL_SELECT); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, RelationGetRelationName(rel)); if (rel->rd_rel->relkind == RELKIND_VIEW || rel->rd_rel->relkind == RELKIND_CONTVIEW) return currtid_for_view(rel, tid); result = (ItemPointer) palloc(sizeof(ItemPointerData)); ItemPointerCopy(tid, result); snapshot = RegisterSnapshot(GetLatestSnapshot()); heap_get_latest_tid(rel, snapshot, result); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); PG_RETURN_ITEMPOINTER(result); }
/* * systable_beginscan_ordered --- set up for ordered catalog scan * * These routines have essentially the same API as systable_beginscan etc, * except that they guarantee to return multiple matching tuples in * index order. Also, for largely historical reasons, the index to use * is opened and locked by the caller, not here. * * Currently we do not support non-index-based scans here. (In principle * we could do a heapscan and sort, but the uses are in places that * probably don't need to still work with corrupted catalog indexes.) * For the moment, therefore, these functions are merely the thinnest of * wrappers around index_beginscan/index_getnext. The main reason for their * existence is to centralize possible future support of lossy operators * in catalog scans. */ SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; int i; /* REINDEX can probably be a hard error here ... */ if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", RelationGetRelationName(indexRelation)); /* ... but we only throw a warning about violating IgnoreSystemIndexes */ if (IgnoreSystemIndexes) elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes", RelationGetRelationName(indexRelation)); sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < indexRelation->rd_index->indnatts; j++) { if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == indexRelation->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, indexRelation, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; return sysscan; }
/* * CreateUtilityQueryDesc */ QueryDesc * CreateUtilityQueryDesc(Node *utilitystmt, const char *sourceText, Snapshot snapshot, DestReceiver *dest, ParamListInfo params) { QueryDesc *qd = (QueryDesc *) palloc(sizeof(QueryDesc)); qd->operation = CMD_UTILITY; /* operation */ qd->plannedstmt = NULL; qd->utilitystmt = utilitystmt; /* utility command */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ qd->crosscheck_snapshot = InvalidSnapshot; /* RI check snapshot */ qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->instrument_options = false; /* uninteresting for utilities */ /* null these fields until set by ExecutorStart */ qd->tupDesc = NULL; qd->estate = NULL; qd->planstate = NULL; qd->totaltime = NULL; return qd; }
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) { Snapshot snapshot; Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); if (!parallel_scan->phs_snapshot_any) { /* Snapshot was serialized -- restore it */ snapshot = RestoreSnapshot((char *) parallel_scan + parallel_scan->phs_snapshot_off); RegisterSnapshot(snapshot); } else { /* SnapshotAny passed by caller (not serialized) */ snapshot = SnapshotAny; } return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, parallel_scan, true, true, true, false, false, !parallel_scan->phs_snapshot_any); }
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) { Oid relid = RelationGetRelid(relation); Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, NULL, true, true, true, false, false, true); }
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) { Assert(IsMVCCSnapshot(snapshot)); RegisterSnapshot(snapshot); scan->rs_snapshot = snapshot; scan->rs_temp_snap = true; }
/* * Inits the visimap store. * The store is ready for usage after this function call. * * Assumes a zero-allocated visimap store data structure. * Assumes that the visimap memory context is active. */ void AppendOnlyVisimapStore_Init(AppendOnlyVisimapStore *visiMapStore, Oid visimapRelid, Oid visimapIdxid, LOCKMODE lockmode, Snapshot snapshot, MemoryContext memoryContext) { TupleDesc heapTupleDesc; ScanKey scanKey; Assert(visiMapStore); Assert(CurrentMemoryContext == memoryContext); Assert(OidIsValid(visimapRelid)); Assert(OidIsValid(visimapIdxid)); visiMapStore->snapshot = RegisterSnapshot(snapshot); visiMapStore->memoryContext = memoryContext; visiMapStore->visimapRelation = heap_open( visimapRelid, lockmode); visiMapStore->visimapIndex = index_open( visimapIdxid, lockmode); heapTupleDesc = RelationGetDescr(visiMapStore->visimapRelation); Assert(heapTupleDesc->natts == Natts_pg_aovisimap); visiMapStore->scanKeys = palloc0(sizeof(ScanKeyData) * APPENDONLY_VISIMAP_INDEX_SCAN_KEY_NUM); /* scan key: segno */ scanKey = visiMapStore->scanKeys; ScanKeyInit(scanKey, Anum_pg_aovisimap_segno, /* segno */ BTEqualStrategyNumber, F_INT4EQ, 0); /* scan key: firstRowNum */ scanKey++; ScanKeyInit(scanKey, Anum_pg_aovisimap_firstrownum, /* attribute number to scan */ BTEqualStrategyNumber, /* strategy */ F_INT8EQ, /* reg proc to use */ 0); }
/* * index_beginscan_parallel - join parallel index scan * * Caller must be holding suitable locks on the heap and the index. */ IndexScanDesc index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, int norderbys, ParallelIndexScanDesc pscan) { Snapshot snapshot; IndexScanDesc scan; Assert(RelationGetRelid(heaprel) == pscan->ps_relid); snapshot = RestoreSnapshot(pscan->ps_snapshot_data); RegisterSnapshot(snapshot); scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot, pscan, true); /* * Save additional parameters into the scandesc. Everything else was set * up by index_beginscan_internal. */ scan->heapRelation = heaprel; scan->xs_snapshot = snapshot; return scan; }
Datum currtid_byreloid(PG_FUNCTION_ARGS) { Oid reloid = PG_GETARG_OID(0); ItemPointer tid = PG_GETARG_ITEMPOINTER(1); ItemPointer result; Relation rel; AclResult aclresult; Snapshot snapshot; result = (ItemPointer) palloc(sizeof(ItemPointerData)); if (!reloid) { *result = Current_last_tid; PG_RETURN_ITEMPOINTER(result); } rel = heap_open(reloid, AccessShareLock); aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), ACL_SELECT); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, RelationGetRelationName(rel)); if (rel->rd_rel->relkind == RELKIND_VIEW || rel->rd_rel->relkind == RELKIND_CONTVIEW) return currtid_for_view(rel, tid); ItemPointerCopy(tid, result); snapshot = RegisterSnapshot(GetLatestSnapshot()); heap_get_latest_tid(rel, snapshot, result); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); PG_RETURN_ITEMPOINTER(result); }
/* * Load GUC settings from pg_db_role_setting. * * We try specific settings for the database/role combination, as well as * general for this database and for this user. */ static void process_settings(Oid databaseid, Oid roleid) { Relation relsetting; Snapshot snapshot; if (!IsUnderPostmaster) return; relsetting = heap_open(DbRoleSettingRelationId, AccessShareLock); /* read all the settings under the same snapsot for efficiency */ snapshot = RegisterSnapshot(GetCatalogSnapshot(DbRoleSettingRelationId)); /* Later settings are ignored if set earlier. */ ApplySetting(snapshot, databaseid, roleid, relsetting, PGC_S_DATABASE_USER); ApplySetting(snapshot, InvalidOid, roleid, relsetting, PGC_S_USER); ApplySetting(snapshot, databaseid, InvalidOid, relsetting, PGC_S_DATABASE); ApplySetting(snapshot, InvalidOid, InvalidOid, relsetting, PGC_S_GLOBAL); UnregisterSnapshot(snapshot); heap_close(relsetting, AccessShareLock); }
/* * systable_beginscan --- set up for heap-or-index scan * * rel: catalog to scan, already opened and suitably locked * indexId: OID of index to conditionally use * indexOK: if false, forces a heap scan (see notes below) * snapshot: time qual to use (NULL for a recent catalog snapshot) * nkeys, key: scan keys * * The attribute numbers in the scan key should be set for the heap case. * If we choose to index, we reset them to 1..n to reference the index * columns. Note this means there must be one scankey qualification per * index column! This is checked by the Asserts in the normal, index-using * case, but won't be checked if the heapscan path is taken. * * The routine checks the normal cases for whether an indexscan is safe, * but caller can make additional checks and pass indexOK=false if needed. * In standard case indexOK can simply be constant TRUE. */ SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; Relation irel; if (indexOK && !IgnoreSystemIndexes && !ReindexIsProcessingIndex(indexId)) irel = index_open(indexId, AccessShareLock); else irel = NULL; sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = irel; if (snapshot == NULL) { Oid relid = RelationGetRelid(heapRelation); snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); sysscan->snapshot = snapshot; } else { /* Caller is responsible for any snapshot. */ sysscan->snapshot = NULL; } if (irel) { int i; /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < irel->rd_index->indnatts; j++) { if (key[i].sk_attno == irel->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == irel->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, irel, snapshot, nkeys, 0); index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; } else { /* * We disallow synchronized scans when forced to use a heapscan on a * catalog. In most cases the desired rows are near the front, so * that the unpredictable start point of a syncscan is a serious * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, nkeys, key, true, false); sysscan->iscan = NULL; } return sysscan; }
/* * Performs a compaction of an append-only relation in column-orientation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. If * set, the insert_segno should also be marked as in-use. * When the insert segno is negative, only truncate to eof operations * can be executed. * * The caller is required to hold either an AccessExclusiveLock (vacuum full) * or a ShareLock on the relation. */ void AOCSCompact(Relation aorel, List *compaction_segno, int insert_segno, bool isFull) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; AOCSInsertDesc insertDesc = NULL; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(insert_segno >= 0); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); if (insert_segno >= 0) { insertDesc = aocs_insert_init(aorel, insert_segno, false); } for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } if (segno == insert_segno) { /* We cannot compact the segment file we are inserting to. */ continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "compaction skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); if (AppendOnlyCompaction_ShouldCompact(aorel, fsinfo->segno, fsinfo->total_tupcount, isFull, appendOnlyMetaDataSnapshot)) { AOCSSegmentFileFullCompaction(aorel, insertDesc, fsinfo, appendOnlyMetaDataSnapshot); } pfree(fsinfo); } if (insertDesc != NULL) aocs_insert_finish(insertDesc); if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
/* * Performs a compaction of an append-only AOCS relation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. * */ void AOCSDrop(Relation aorel, List *compaction_segno) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Drop AOCS relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "drop skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); if (fsinfo->state == AOSEG_STATE_AWAITING_DROP) { Assert(HasLockForSegmentFileDrop(aorel)); AOCSCompaction_DropSegmentFile(aorel, segno); ClearAOCSFileSegInfo(aorel, segno, AOSEG_STATE_DEFAULT); } pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
/* * Truncates each segment file to the AOCS relation to its EOF. * If we cannot get a lock on the segment file (because e.g. a concurrent insert) * the segment file is skipped. */ void AOCSTruncateToEOF(Relation aorel) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "truncate skips AO segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); AOCSSegmentFileTruncateToEOF(aorel, fsinfo); pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }