/* * Fills in the relation statistics for an append-only relation. * * This information is used to update the reltuples and relpages information * in pg_class. reltuples is the same as "pg_aoseg_<oid>:tupcount" * column and we simulate relpages by subdividing the eof value * ("pg_aoseg_<oid>:eof") over the defined page size. */ void vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, BlockNumber *rel_pages, double *rel_tuples, bool *relhasindex) { FileSegTotals *fstotal; BlockNumber nblocks; char *relname; double num_tuples; double totalbytes; double eof; int64 hidden_tupcount; AppendOnlyVisimap visimap; Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); /* get updated statistics from the pg_aoseg table */ if (RelationIsAoRows(aorel)) { fstotal = GetSegFilesTotals(aorel, snapshot); } else { Assert(RelationIsAoCols(aorel)); fstotal = GetAOCSSSegFilesTotals(aorel, snapshot); } /* calculate the values we care about */ eof = (double)fstotal->totalbytes; num_tuples = (double)fstotal->totaltuples; totalbytes = eof; nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes); AppendOnlyVisimap_Init(&visimap, aorel->rd_appendonly->visimaprelid, aorel->rd_appendonly->visimapidxid, AccessShareLock, snapshot); hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap); num_tuples -= hidden_tupcount; Assert(num_tuples > -1.0); AppendOnlyVisimap_Finish(&visimap, AccessShareLock); elogif (Debug_appendonly_print_compaction, LOG, "Gather statistics after vacuum for append-only relation %s: " "page count %d, tuple count %f", relname, nblocks, num_tuples); *rel_pages = nblocks; *rel_tuples = num_tuples; *relhasindex = aorel->rd_rel->relhasindex; ereport(elevel, (errmsg("\"%s\": found %.0f rows in %u pages.", relname, num_tuples, nblocks))); pfree(fstotal); }
/* * Given a WITH(...) clause and no other column encoding directives -- such as * in the case of CREATE TABLE WITH () AS SELECT -- fill in the column encoding * catalog entries for that relation. */ void AddDefaultRelationAttributeOptions(Relation rel, List *options) { Datum opts; AttrNumber attno; List *ce; /* only supported on AOCO at this stage */ if (!RelationIsAoCols(rel)) return; ce = form_default_storage_directive(options); if (!ce) ce = default_column_encoding_clause(); ce = transformStorageEncodingClause(ce); opts = transformRelOptions(PointerGetDatum(NULL), ce, true, false); for (attno = 1; attno <= RelationGetNumberOfAttributes(rel); attno++) add_attribute_encoding_entry(RelationGetRelid(rel), attno, opts); CommandCounterIncrement(); }
/* * Returns true if the relation has no tuples. Prepare phase of * compaction invokes this function on each QE. * * Examples of empty tables: * 1. parent of a partitioned table * 2. table that is created but no tuples have been inserted yet * 3. table from which all existing tuples are deleted and the table * is vacuumed. This is a special case in which pg_aoseg_<oid> has * non-zero number of rows but tupcount value is zero for all rows. */ bool AppendOnlyCompaction_IsRelationEmpty(Relation aorel) { AppendOnlyEntry *aoEntry; Relation pg_aoseg_rel; TupleDesc pg_aoseg_dsc; HeapTuple tuple; HeapScanDesc aoscan; int Anum_tupcount; bool empty = true; Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel)); aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow); pg_aoseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); aoscan = heap_beginscan(pg_aoseg_rel, SnapshotNow, 0, NULL); Anum_tupcount = RelationIsAoRows(aorel)? Anum_pg_aoseg_tupcount: Anum_pg_aocs_tupcount; while ((tuple = heap_getnext(aoscan, ForwardScanDirection)) != NULL && empty) { if (0 < fastgetattr(tuple, Anum_tupcount, pg_aoseg_dsc, NULL)) empty = false; } heap_endscan(aoscan); heap_close(pg_aoseg_rel, AccessShareLock); return empty; }
/* * calculate size of (one fork of) a relation * * Iterator over all files belong to the relation and do stat. * The obviously better way is to use glob. For whatever reason, * glob is extremely slow if there are lots of relations in the * database. So we handle all cases, instead. * * Note: we can safely apply this to temp tables of other sessions, so there * is no check here or at the call sites for that. */ static int64 calculate_relation_size(Relation rel, ForkNumber forknum) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; unsigned int segcount = 0; relationpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum); if (RelationIsHeap(rel)) { /* Ordinary relation, including heap and index. * They take form of relationpath, or relationpath.%d * There will be no holes, therefore, we can stop when * we reach the first non-existing file. */ for (segcount = 0;; segcount++) { struct stat fst; CHECK_FOR_INTERRUPTS(); if (segcount == 0) snprintf(pathname, MAXPGPATH, "%s", relationpath); else snprintf(pathname, MAXPGPATH, "%s.%u", relationpath, segcount); if (stat(pathname, &fst) < 0) { if (errno == ENOENT) break; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file %s: %m", pathname))); } totalsize += fst.st_size; } } /* AO tables don't have any extra forks. */ else if (forknum == MAIN_FORKNUM) { if (RelationIsAoRows(rel)) { totalsize = GetAOTotalBytes(rel, GetActiveSnapshot()); } else if (RelationIsAoCols(rel)) { totalsize = GetAOCSTotalBytes(rel, GetActiveSnapshot(), true); } } /* RELSTORAGE_VIRTUAL has no space usage */ return totalsize; }
/** * Given the oid of a relation, this method calculates reltuples, relpages. This only looks up * local information (on master or segments). It produces meaningful values for AO and * heap tables and returns [0.0,0.0] for all other relations. * Input: * relationoid * Output: * array of two values [reltuples,relpages] */ Datum gp_statistics_estimate_reltuples_relpages_oid(PG_FUNCTION_ARGS) { float4 relpages = 0.0; float4 reltuples = 0.0; Oid relOid = PG_GETARG_OID(0); Datum values[2]; ArrayType *result; Relation rel = try_relation_open(relOid, AccessShareLock, false); if (rel != NULL) { if (rel->rd_rel->relkind == RELKIND_RELATION) { if (RelationIsHeap(rel)) { gp_statistics_estimate_reltuples_relpages_heap(rel, &reltuples, &relpages); } else if (RelationIsAoRows(rel)) { gp_statistics_estimate_reltuples_relpages_ao_rows(rel, &reltuples, &relpages); } else if (RelationIsAoCols(rel)) { gp_statistics_estimate_reltuples_relpages_ao_cs(rel, &reltuples, &relpages); } } else if (rel->rd_rel->relkind == RELKIND_INDEX) { reltuples = 1.0; relpages = RelationGetNumberOfBlocks(rel); } else { /** * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1. */ } relation_close(rel, AccessShareLock); } else { /** * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1. */ } values[0] = Float4GetDatum(reltuples); values[1] = Float4GetDatum(relpages); result = construct_array(values, 2, FLOAT4OID, sizeof(float4), true, 'i'); PG_RETURN_ARRAYTYPE_P(result); }
static void gp_statistics_estimate_reltuples_relpages_ao_cs(Relation rel, float4 *reltuples, float4 *relpages) { AOCSFileSegInfo **aocsInfo = NULL; int nsegs = 0; double totalBytes = 0; AppendOnlyEntry *aoEntry; int64 hidden_tupcount; AppendOnlyVisimap visimap; /** * Ensure that the right kind of relation with the right type of storage is passed to us. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); Assert(RelationIsAoCols(rel)); *reltuples = 0.0; *relpages = 0.0; /* get table level statistics from the pg_aoseg table */ aoEntry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); aocsInfo = GetAllAOCSFileSegInfo(rel, aoEntry, SnapshotNow, &nsegs); if (aocsInfo) { int i = 0; int j = 0; for(i = 0; i < nsegs; i++) { for(j = 0; j < RelationGetNumberOfAttributes(rel); j++) { AOCSVPInfoEntry *e = getAOCSVPEntry(aocsInfo[i], j); Assert(e); totalBytes += e->eof_uncompressed; } /* Do not include tuples from an awaiting drop segment file */ if (aocsInfo[i]->state != AOSEG_STATE_AWAITING_DROP) { *reltuples += aocsInfo[i]->total_tupcount; } } /** * The planner doesn't understand AO's blocks, so need this method to try to fudge up a number for * the planner. */ *relpages = RelationGuessNumberOfBlocks(totalBytes); } AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, SnapshotNow); hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap); AppendOnlyVisimap_Finish(&visimap, AccessShareLock); (*reltuples) -= hidden_tupcount; pfree(aoEntry); return; }
/* * calculate size of a relation * * Iterator over all files belong to the relation and do stat. * The obviously better way is to use glob. For whatever reason, * glob is extremely slow if there are lots of relations in the * database. So we handle all cases, instead. */ static int64 calculate_relation_size(Relation rel) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; struct stat fst; int i; relationpath = relpath(rel->rd_node); if(RelationIsHeap(rel)) { /* Ordinary relation, including heap and index. * They take form of relationpath, or relationpath.%d * There will be no holes, therefore, we can stop we * we reach the first non-exist file. */ for(i=0; ; ++i) { if (i==0) snprintf(pathname, MAXPGPATH, "%s", relationpath); else snprintf(pathname, MAXPGPATH, "%s.%d", relationpath, i); if (stat(pathname, &fst) >= 0) totalsize += fst.st_size; else { if (errno == ENOENT) break; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file %s: %m", pathname) )); } } } else if (RelationIsAoRows(rel)) totalsize = GetAOTotalBytes(rel, SnapshotNow); else if (RelationIsAoCols(rel)) totalsize = GetAOCSTotalBytes(rel, SnapshotNow); /* RELSTORAGE_VIRTUAL has no space usage */ return totalsize; }
/* * Compute the on-disk size of files for the relation according to the * stat function, including heap data, index data, toast data, aoseg data, * aoblkdir data, and aovisimap data. */ static int64 calculate_total_relation_size(Oid Relid) { Relation heapRel; Oid toastOid; AppendOnlyEntry *aoEntry = NULL; int64 size; ListCell *cell; heapRel = try_relation_open(Relid, AccessShareLock, false); if (!RelationIsValid(heapRel)) return 0; toastOid = heapRel->rd_rel->reltoastrelid; if (RelationIsAoRows(heapRel) || RelationIsAoCols(heapRel)) aoEntry = GetAppendOnlyEntry(Relid, SnapshotNow); /* Get the heap size */ if (Relid == 0 || heapRel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(heapRel); /* Include any dependent indexes */ if (heapRel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(heapRel); foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation iRel; iRel = try_relation_open(idxOid, AccessShareLock, false); if (RelationIsValid(iRel)) { size += calculate_relation_size(iRel); relation_close(iRel, AccessShareLock); } }
/** * Drops a segment file. * */ static void AOCSCompaction_DropSegmentFile(Relation aorel, int segno) { ItemPointerData persistentTid; int64 persistentSerialNum; int pseudoSegNo; int col; Assert(RelationIsAoCols(aorel)); for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++) { pseudoSegNo = (col * AOTupleId_MultiplierSegmentFileNum) + segno; if (!ReadGpRelationNode( aorel->rd_rel->reltablespace, aorel->rd_rel->relfilenode, pseudoSegNo, &persistentTid, &persistentSerialNum)) { /* There is nothing to drop */ return; } elogif(Debug_appendonly_print_compaction, LOG, "Drop segment file: " "segno %d", pseudoSegNo); MirroredFileSysObj_ScheduleDropAppendOnlyFile( &aorel->rd_node, pseudoSegNo, RelationGetRelationName(aorel), &persistentTid, persistentSerialNum); DeleteGpRelationNodeTuple(aorel, pseudoSegNo); } }
/* * Drops a segment file. * * Actually, we just truncate the segfile to 0 bytes, to reclaim the space. * Before GPDB 6, we used to remove the file, but with WAL replication, we * no longer have a convenient function to remove a single segment of a * relation. An empty file is as almost as good as a non-existent file. If * the relation is dropped later, the code in mdunlink() will remove all * segments, including any empty ones we've left behind. */ static void AOCSCompaction_DropSegmentFile(Relation aorel, int segno) { int col; Assert(RelationIsAoCols(aorel)); for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++) { char filenamepath[MAXPGPATH]; int pseudoSegNo; File fd; /* Open and truncate the relation segfile */ MakeAOSegmentFileName(aorel, segno, col, &pseudoSegNo, filenamepath); elogif(Debug_appendonly_print_compaction, LOG, "Drop segment file: " "segno %d", pseudoSegNo); fd = OpenAOSegmentFile(aorel, filenamepath, pseudoSegNo, 0); if (fd >= 0) { TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0); CloseAOSegmentFile(fd); } else { /* * The file we were about to drop/truncate didn't exist. That's normal, * for example, if a column is added with ALTER TABLE ADD COLUMN. */ elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath); } } }
void AlterTableCreateAoBlkdirTable(Oid relOid, bool is_part_child) { Relation rel; TupleDesc tupdesc; IndexInfo *indexInfo; Oid classObjectId[3]; int16 coloptions[3]; /* * Grab an exclusive lock on the target table, which we will NOT release * until end of transaction. (This is probably redundant in all present * uses...) */ if (is_part_child) rel = heap_open(relOid, NoLock); else rel = heap_open(relOid, AccessExclusiveLock); if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) { heap_close(rel, NoLock); return; } /* Create a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(4, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "columngroup_no", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "first_row_no", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "minipage", VARBITOID, -1, 0); /* * We don't want any toast columns here. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* TODO (dmeister): In the next line, the index should have been 3. * Therefore the minipage might be toasted. */ tupdesc->attrs[2]->attstorage = 'p'; /* * Create index on segno, first_row_no. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 3; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_KeyAttrNumbers[2] = 3; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_Concurrent = false; classObjectId[0] = INT4_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; classObjectId[2] = INT8_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; coloptions[2] = 0; (void) CreateAOAuxiliaryTable(rel, "pg_aoblkdir", RELKIND_AOBLOCKDIR, tupdesc, indexInfo, classObjectId, coloptions); heap_close(rel, NoLock); }
/* * Performs a compaction of an append-only AOCS relation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. * */ void AOCSDrop(Relation aorel, List *compaction_segno) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Drop AOCS relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "drop skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); if (fsinfo->state == AOSEG_STATE_AWAITING_DROP) { Assert(HasLockForSegmentFileDrop(aorel)); AOCSCompaction_DropSegmentFile(aorel, segno); ClearAOCSFileSegInfo(aorel, segno, AOSEG_STATE_DEFAULT); } pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
void AlterTableCreateAoVisimapTable(Oid relOid, bool is_part_child) { Relation rel; IndexInfo *indexInfo; TupleDesc tupdesc; Oid classObjectId[2]; int16 coloptions[2]; elogif(Debug_appendonly_print_visimap, LOG, "Create visimap for relation %d", relOid); /* * Grab an exclusive lock on the target table, which we will NOT release * until end of transaction. (This is probably redundant in all present * uses...) */ if (is_part_child) rel = heap_open(relOid, NoLock); else rel = heap_open(relOid, AccessExclusiveLock); if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) { heap_close(rel, NoLock); return; } /* Create a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(Natts_pg_aovisimap, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_no", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "visimap", BYTEAOID, -1, 0); /* * We don't want any toast columns here. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* * Create index on segno, first_row_no. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_Concurrent = false; classObjectId[0] = INT4_BTREE_OPS_OID; classObjectId[1] = INT8_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; (void) CreateAOAuxiliaryTable(rel, "pg_aovisimap", RELKIND_AOVISIMAP, tupdesc, indexInfo, classObjectId, coloptions); heap_close(rel, NoLock); }
static Datum gp_aovisimap_hidden_info_internal(PG_FUNCTION_ARGS, Oid aoRelOid) { Datum values[3]; bool nulls[3]; HeapTuple tuple; Datum result; typedef struct Context { AppendOnlyVisimap visiMap; Relation parentRelation; FileSegInfo **appendonlySegfileInfo; AOCSFileSegInfo **aocsSegfileInfo; int segfile_info_total; int i; } Context; FuncCallContext *funcctx; Context *context; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function * calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ tupdesc = CreateTemplateTupleDesc(3, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "hidden_tupcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "total_tupcount", INT8OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ context = (Context *) palloc0(sizeof(Context)); context->parentRelation = heap_open(aoRelOid, AccessShareLock); if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation))) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Function not supported on relation"))); } if (RelationIsAoRows(context->parentRelation)) { context->appendonlySegfileInfo = GetAllFileSegInfo( context->parentRelation, SnapshotNow, &context->segfile_info_total); } else { Assert(RelationIsAoCols(context->parentRelation)); context->aocsSegfileInfo = GetAllAOCSFileSegInfo(context->parentRelation, SnapshotNow, &context->segfile_info_total); } context->i = 0; AppendOnlyVisimap_Init(&context->visiMap, context->parentRelation->rd_appendonly->visimaprelid, context->parentRelation->rd_appendonly->visimapidxid, AccessShareLock, SnapshotNow); funcctx->user_fctx = (void *) context; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); context = (Context *) funcctx->user_fctx; while (context->i < context->segfile_info_total) { int64 tupcount; int segno; if (context->appendonlySegfileInfo) { FileSegInfo *fsinfo = context->appendonlySegfileInfo[context->i]; tupcount = fsinfo->total_tupcount; segno = fsinfo->segno; } else if (context->aocsSegfileInfo) { AOCSFileSegInfo *fsinfo = context->aocsSegfileInfo[context->i]; tupcount = fsinfo->total_tupcount; segno = fsinfo->segno; } else { Insist(false); } MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); values[0] = Int32GetDatum(segno); values[1] = Int64GetDatum(AppendOnlyVisimap_GetSegmentFileHiddenTupleCount( &context->visiMap, segno)); values[2] = Int64GetDatum(tupcount); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); context->i++; SRF_RETURN_NEXT(funcctx, result); } AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock); if (context->appendonlySegfileInfo) { FreeAllSegFileInfo(context->appendonlySegfileInfo, context->segfile_info_total); pfree(context->appendonlySegfileInfo); context->appendonlySegfileInfo = NULL; } if (context->aocsSegfileInfo) { FreeAllAOCSSegFileInfo(context->aocsSegfileInfo, context->segfile_info_total); pfree(context->aocsSegfileInfo); context->aocsSegfileInfo = NULL; } heap_close(context->parentRelation, AccessShareLock); pfree(context); funcctx->user_fctx = NULL; SRF_RETURN_DONE(funcctx); }
/* * AOCSSegmentFileTruncateToEOF() * * Assumes that the segment file lock is already held. * * For the segment file is truncates to the eof. */ static void AOCSSegmentFileTruncateToEOF(Relation aorel, AOCSFileSegInfo *fsinfo) { const char *relname = RelationGetRelationName(aorel); int segno; int j; Assert(fsinfo); Assert(RelationIsAoCols(aorel)); segno = fsinfo->segno; relname = RelationGetRelationName(aorel); for (j = 0; j < fsinfo->vpinfo.nEntry; ++j) { int64 segeof; char filenamepath[MAXPGPATH]; AOCSVPInfoEntry *entry; File fd; int32 fileSegNo; entry = getAOCSVPEntry(fsinfo, j); segeof = entry->eof; /* Open and truncate the relation segfile to its eof */ MakeAOSegmentFileName(aorel, segno, j, &fileSegNo, filenamepath); elogif(Debug_appendonly_print_compaction, LOG, "Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); fd = OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof); if (fd >= 0) { TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof); CloseAOSegmentFile(fd); elogif(Debug_appendonly_print_compaction, LOG, "Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); } else { elogif(Debug_appendonly_print_compaction, LOG, "No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); } } }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */ void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; if (vacstmt->verbose) elevel = INFO; else elevel = DEBUG2; if (Gp_role == GP_ROLE_DISPATCH) elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ #ifdef FAULT_INJECTOR if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeSegmentFileDropPhase, DDLNotSpecified, "", // databaseName ""); // tableName } if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeCleanupPhase, DDLNotSpecified, "", // databaseName ""); // tableName } #endif /* * MPP-23647. Update xid limits for heap as well as appendonly * relations. This allows setting relfrozenxid to correct value * for an appendonly (AO/CO) table. */ vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit); /* * Execute the various vacuum operations. Appendonly tables are treated * differently. */ if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel)) { lazy_vacuum_aorel(onerel, vacstmt, updated_stats); return; } vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); /* heap relation */ /* Set threshold for interesting free space = average request size */ /* XXX should we scale it up or down? Adjust vacuum.c too, if so */ vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node); /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) lazy_truncate_heap(onerel, vacrelstats); /* Update shared free space map with final free space info */ lazy_update_fsm(onerel, vacrelstats); /* Update statistics in pg_class */ vac_update_relstats(onerel, vacrelstats->rel_pages, vacrelstats->rel_tuples, vacrelstats->hasindex, FreezeLimit, updated_stats); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, true /*vacrelstats->scanned_all*/, vacstmt->analyze, vacrelstats->rel_tuples); }
/* ---------------------------------------------------------------- * ExecDelete * * DELETE is like UPDATE, except that we delete the tuple and no * index modifications are needed. * DELETE can be part of an update operation when * there is a preceding SplitUpdate node. * * ---------------------------------------------------------------- */ void ExecDelete(ItemPointer tupleid, TupleTableSlot *planSlot, DestReceiver *dest, EState *estate, PlanGenerator planGen, bool isUpdate) { ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; /* * Get information on the (current) result relation. */ if (estate->es_result_partitions && planGen == PLANGEN_OPTIMIZER) { Assert(estate->es_result_partitions->part->parrelid); #ifdef USE_ASSERT_CHECKING Oid parent = estate->es_result_partitions->part->parrelid; #endif /* Obtain part for current tuple. */ resultRelInfo = slot_get_partition(planSlot, estate); estate->es_result_relation_info = resultRelInfo; #ifdef USE_ASSERT_CHECKING Oid part = RelationGetRelid(resultRelInfo->ri_RelationDesc); #endif Assert(parent != part); } else { resultRelInfo = estate->es_result_relation_info; } resultRelationDesc = resultRelInfo->ri_RelationDesc; Assert (!resultRelInfo->ri_projectReturning); if (planGen == PLANGEN_PLANNER) { /* BEFORE ROW DELETE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0) { bool dodelete; dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid, estate->es_snapshot->curcid); if (!dodelete) /* "do nothing" */ return; } } bool isHeapTable = RelationIsHeap(resultRelationDesc); bool isAORowsTable = RelationIsAoRows(resultRelationDesc); bool isAOColsTable = RelationIsAoCols(resultRelationDesc); bool isExternalTable = RelationIsExternal(resultRelationDesc); if (isExternalTable && estate->es_result_partitions && estate->es_result_partitions->part->parrelid != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Delete from external partitions not supported."))); return; } /* * delete the tuple * * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that * the row to be deleted is visible to that snapshot, and throw a can't- * serialize error if not. This is a special-case behavior needed for * referential integrity updates in serializable transactions. */ ldelete:; if (isHeapTable) { result = heap_delete(resultRelationDesc, tupleid, &update_ctid, &update_xmax, estate->es_snapshot->curcid, estate->es_crosscheck_snapshot, true /* wait for commit */ ); } else if (isAORowsTable) { if (IsXactIsoLevelSerializable) { if (!isUpdate) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Deletes on append-only tables are not supported in serializable transactions."))); else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Updates on append-only tables are not supported in serializable transactions."))); } if (resultRelInfo->ri_deleteDesc == NULL) { resultRelInfo->ri_deleteDesc = appendonly_delete_init(resultRelationDesc, ActiveSnapshot); } AOTupleId* aoTupleId = (AOTupleId*)tupleid; result = appendonly_delete(resultRelInfo->ri_deleteDesc, aoTupleId); } else if (isAOColsTable) { if (IsXactIsoLevelSerializable) { if (!isUpdate) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Deletes on append-only tables are not supported in serializable transactions."))); else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Updates on append-only tables are not supported in serializable transactions."))); } if (resultRelInfo->ri_deleteDesc == NULL) { resultRelInfo->ri_deleteDesc = aocs_delete_init(resultRelationDesc); } AOTupleId* aoTupleId = (AOTupleId*)tupleid; result = aocs_delete(resultRelInfo->ri_deleteDesc, aoTupleId); } else { Insist(0); } switch (result) { case HeapTupleSelfUpdated: /* already deleted by self; nothing to do */ /* * In an scenario in which R(a,b) and S(a,b) have * R S * ________ ________ * (1, 1) (1, 2) * (1, 7) * * An update query such as: * UPDATE R SET a = S.b FROM S WHERE R.b = S.a; * * will have an non-deterministic output. The tuple in R * can be updated to (2,1) or (7,1). * Since the introduction of SplitUpdate, these queries will * send multiple requests to delete the same tuple. Therefore, * in order to avoid a non-deterministic output, * an error is reported in such scenario. */ if (isUpdate) { ereport(ERROR, (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION ), errmsg("multiple updates to a row by the same query is not allowed"))); } return; case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); else if (!ItemPointerEquals(tupleid, &update_ctid)) { TupleTableSlot *epqslot; epqslot = EvalPlanQual(estate, resultRelInfo->ri_RangeTableIndex, &update_ctid, update_xmax, estate->es_snapshot->curcid); if (!TupIsNull(epqslot)) { *tupleid = update_ctid; goto ldelete; } } /* tuple already deleted; nothing to do */ return; default: elog(ERROR, "unrecognized heap_delete status: %u", result); return; } if (!isUpdate) { IncrDeleted(); (estate->es_processed)++; /* * To notify master if tuples deleted or not, to update mod_count. */ (resultRelInfo->ri_aoprocessed)++; } /* * Note: Normally one would think that we have to delete index tuples * associated with the heap tuple now... * * ... but in POSTGRES, we have no need to do this because VACUUM will * take care of it later. We can't delete index tuples immediately * anyway, since the tuple is still visible to other transactions. */ if (planGen == PLANGEN_PLANNER) { /* AFTER ROW DELETE Triggers */ ExecARDeleteTriggers(estate, resultRelInfo, tupleid); } }
/* * Performs a compaction of an append-only relation in column-orientation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. If * set, the insert_segno should also be marked as in-use. * When the insert segno is negative, only truncate to eof operations * can be executed. * * The caller is required to hold either an AccessExclusiveLock (vacuum full) * or a ShareLock on the relation. */ void AOCSCompact(Relation aorel, List *compaction_segno, int insert_segno, bool isFull) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; AOCSInsertDesc insertDesc = NULL; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(insert_segno >= 0); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); if (insert_segno >= 0) { insertDesc = aocs_insert_init(aorel, insert_segno, false); } for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; if (!list_member_int(compaction_segno, segno)) { continue; } if (segno == insert_segno) { /* We cannot compact the segment file we are inserting to. */ continue; } /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "compaction skips AOCS segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); if (AppendOnlyCompaction_ShouldCompact(aorel, fsinfo->segno, fsinfo->total_tupcount, isFull, appendOnlyMetaDataSnapshot)) { AOCSSegmentFileFullCompaction(aorel, insertDesc, fsinfo, appendOnlyMetaDataSnapshot); } pfree(fsinfo); } if (insertDesc != NULL) aocs_insert_finish(insertDesc); if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }
/* * Assumes that the segment file lock is already held. * Assumes that the segment file should be compacted. */ static bool AOCSSegmentFileFullCompaction(Relation aorel, AOCSInsertDesc insertDesc, AOCSFileSegInfo *fsinfo, Snapshot snapshot) { const char *relname; AppendOnlyVisimap visiMap; AOCSScanDesc scanDesc; TupleDesc tupDesc; TupleTableSlot *slot; int compact_segno; int64 movedTupleCount = 0; ResultRelInfo *resultRelInfo; MemTupleBinding *mt_bind; EState *estate; bool *proj; int i; AOTupleId *aoTupleId; int64 tupleCount = 0; int64 tuplePerPage = INT_MAX; Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); Assert(insertDesc); compact_segno = fsinfo->segno; if (fsinfo->varblockcount > 0) { tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount; } relname = RelationGetRelationName(aorel); AppendOnlyVisimap_Init(&visiMap, aorel->rd_appendonly->visimaprelid, aorel->rd_appendonly->visimapidxid, ShareLock, snapshot); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO segfile %d, relation %sd", compact_segno, relname); proj = palloc0(sizeof(bool) * RelationGetNumberOfAttributes(aorel)); for (i = 0; i < RelationGetNumberOfAttributes(aorel); ++i) { proj[i] = true; } scanDesc = aocs_beginrangescan(aorel, snapshot, snapshot, &compact_segno, 1, NULL, proj); tupDesc = RelationGetDescr(aorel); slot = MakeSingleTupleTableSlot(tupDesc); mt_bind = create_memtuple_binding(tupDesc); /* * We need a ResultRelInfo and an EState so we can use the regular * executor's index-entry-making machinery. */ estate = CreateExecutorState(); resultRelInfo = makeNode(ResultRelInfo); resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = aorel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ ExecOpenIndices(resultRelInfo); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; while (aocs_getnext(scanDesc, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); aoTupleId = (AOTupleId *) slot_get_ctid(slot); if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId)) { AOCSMoveTuple(slot, insertDesc, resultRelInfo, estate); movedTupleCount++; } else { /* Tuple is invisible and needs to be dropped */ AppendOnlyThrowAwayTuple(aorel, slot, mt_bind); } /* * Check for vacuum delay point after approximatly a var block */ tupleCount++; if (VacuumCostActive && tupleCount % tuplePerPage == 0) { vacuum_delay_point(); } } SetAOCSFileSegInfoState(aorel, compact_segno, AOSEG_STATE_AWAITING_DROP); AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno); /* Delete all mini pages of the segment files if block directory exists */ if (OidIsValid(aorel->rd_appendonly->blkdirrelid)) { AppendOnlyBlockDirectory_DeleteSegmentFile(aorel, snapshot, compact_segno, 0); } elogif(Debug_appendonly_print_compaction, LOG, "Finished compaction: " "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT, compact_segno, relname, movedTupleCount); AppendOnlyVisimap_Finish(&visiMap, NoLock); ExecCloseIndices(resultRelInfo); FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); destroy_memtuple_binding(mt_bind); aocs_endscan(scanDesc); pfree(proj); return true; }
/* * vacuum_appendonly_rel() -- vaccum an append-only relation * * This procedure will be what gets executed both for VACUUM * and VACUUM FULL (and also ANALYZE or any other thing that * needs the pg_class stats updated). * * The function can compact append-only segment files or just * truncating the segment file to its existing eof. * * Afterwards, the reltuples and relpages information in pg_class * are updated. reltuples is the same as "pg_aoseg_<oid>:tupcount" * column and we simulate relpages by subdividing the eof value * ("pg_aoseg_<oid>:eof") over the defined page size. * * * There are txn ids, hint bits, free space, dead tuples, * etc. these are all irrelevant in the append only relation context. * */ void vacuum_appendonly_rel(Relation aorel, VacuumStmt *vacstmt) { char *relname; PGRUsage ru0; Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel)); Assert(!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt)); pg_rusage_init(&ru0); relname = RelationGetRelationName(aorel); ereport(elevel, (errmsg("vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(aorel)), relname))); if (Gp_role == GP_ROLE_DISPATCH) { return; } Assert(list_length(vacstmt->appendonly_compaction_insert_segno) <= 1); if (vacstmt->appendonly_compaction_insert_segno == NULL) { elogif(Debug_appendonly_print_compaction, LOG, "Vacuum drop phase %s", RelationGetRelationName(aorel)); if (RelationIsAoRows(aorel)) { AppendOnlyDrop(aorel, vacstmt->appendonly_compaction_segno); } else { Assert(RelationIsAoCols(aorel)); AOCSDrop(aorel, vacstmt->appendonly_compaction_segno); } } else { int insert_segno = linitial_int(vacstmt->appendonly_compaction_insert_segno); if (insert_segno == APPENDONLY_COMPACTION_SEGNO_INVALID) { elogif(Debug_appendonly_print_compaction, LOG, "Vacuum pseudo-compaction phase %s", RelationGetRelationName(aorel)); } else { elogif(Debug_appendonly_print_compaction, LOG, "Vacuum compaction phase %s", RelationGetRelationName(aorel)); if (RelationIsAoRows(aorel)) { AppendOnlyCompact(aorel, vacstmt->appendonly_compaction_segno, insert_segno, vacstmt->full); } else { Assert(RelationIsAoCols(aorel)); AOCSCompact(aorel, vacstmt->appendonly_compaction_segno, insert_segno, vacstmt->full); } } } }
/* ---------------------------------------------------------------- * ExecUpdate * * note: we can't run UPDATE queries with transactions * off because UPDATEs are actually INSERTs and our * scan will mistakenly loop forever, updating the tuple * it just inserted.. This should be fixed but until it * is, we don't want to get stuck in an infinite loop * which corrupts your database.. * ---------------------------------------------------------------- */ void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid, TupleTableSlot *planSlot, DestReceiver *dest, EState *estate) { void* tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; AOTupleId aoTupleId = AOTUPLEID_INIT; TupleTableSlot *partslot = NULL; /* * abort the operation if not running transactions */ if (IsBootstrapProcessingMode()) elog(ERROR, "cannot UPDATE during bootstrap"); /* * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; resultRelationDesc = resultRelInfo->ri_RelationDesc; bool rel_is_heap = RelationIsHeap(resultRelationDesc); bool rel_is_aorows = RelationIsAoRows(resultRelationDesc); bool rel_is_aocols = RelationIsAoCols(resultRelationDesc); bool rel_is_external = RelationIsExternal(resultRelationDesc); /* * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ if (rel_is_heap) { partslot = slot; tuple = ExecFetchSlotHeapTuple(partslot); } else if (rel_is_aorows || rel_is_aocols) { /* * It is necessary to reconstruct a logically compatible tuple to * a phyiscally compatible tuple. The slot's tuple descriptor comes * from the projection target list, which doesn't indicate dropped * columns, and MemTuple cannot deal with cases without converting * the target list back into the original relation's tuple desc. */ partslot = reconstructMatchingTupleSlot(slot, resultRelInfo); /* * We directly inline toasted columns here as update with toasted columns * would create two references to the same toasted value. */ tuple = ExecFetchSlotMemTuple(partslot, true); } else if (rel_is_external) { if (estate->es_result_partitions && estate->es_result_partitions->part->parrelid != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Update external partitions not supported."))); return; } else { partslot = slot; tuple = ExecFetchSlotHeapTuple(partslot); } } else { Insist(false); } /* see if this update would move the tuple to a different partition */ if (estate->es_result_partitions) checkPartitionUpdate(estate, partslot, resultRelInfo); /* BEFORE ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0) { HeapTuple newtuple; newtuple = ExecBRUpdateTriggers(estate, resultRelInfo, tupleid, tuple, estate->es_snapshot->curcid); if (newtuple == NULL) /* "do nothing" */ return; if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */ partslot = newslot; tuple = newtuple; } } /* * Check the constraints of the tuple * * If we generate a new candidate tuple after EvalPlanQual testing, we * must loop back here and recheck constraints. (We don't need to redo * triggers, however. If there are any BEFORE triggers then trigger.c * will have done heap_lock_tuple to lock the correct tuple, so there's no * need to do them again.) */ lreplace:; if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, partslot, estate); if (!GpPersistent_IsPersistentRelation(resultRelationDesc->rd_id)) { /* * Normal UPDATE path. */ /* * replace the heap tuple * * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that * the row to be updated is visible to that snapshot, and throw a can't- * serialize error if not. This is a special-case behavior needed for * referential integrity updates in serializable transactions. */ if (rel_is_heap) { result = heap_update(resultRelationDesc, tupleid, tuple, &update_ctid, &update_xmax, estate->es_snapshot->curcid, estate->es_crosscheck_snapshot, true /* wait for commit */ ); } else if (rel_is_aorows) { if (IsXactIsoLevelSerializable) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Updates on append-only tables are not supported in serializable transactions."))); } if (resultRelInfo->ri_updateDesc == NULL) { ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc) appendonly_update_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno); } result = appendonly_update(resultRelInfo->ri_updateDesc, tuple, (AOTupleId *) tupleid, &aoTupleId); } else if (rel_is_aocols) { if (IsXactIsoLevelSerializable) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Updates on append-only tables are not supported in serializable transactions."))); } if (resultRelInfo->ri_updateDesc == NULL) { ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc) aocs_update_init(resultRelationDesc, resultRelInfo->ri_aosegno); } result = aocs_update(resultRelInfo->ri_updateDesc, partslot, (AOTupleId *) tupleid, &aoTupleId); } else { Assert(!"We should not be here"); } switch (result) { case HeapTupleSelfUpdated: /* already deleted by self; nothing to do */ return; case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); else if (!ItemPointerEquals(tupleid, &update_ctid)) { TupleTableSlot *epqslot; epqslot = EvalPlanQual(estate, resultRelInfo->ri_RangeTableIndex, &update_ctid, update_xmax, estate->es_snapshot->curcid); if (!TupIsNull(epqslot)) { *tupleid = update_ctid; partslot = ExecFilterJunk(estate->es_junkFilter, epqslot); tuple = ExecFetchSlotHeapTuple(partslot); goto lreplace; } } /* tuple already deleted; nothing to do */ return; default: elog(ERROR, "unrecognized heap_update status: %u", result); return; } } else { HeapTuple persistentTuple; /* * Persistent metadata path. */ persistentTuple = heap_copytuple(tuple); persistentTuple->t_self = *tupleid; frozen_heap_inplace_update(resultRelationDesc, persistentTuple); heap_freetuple(persistentTuple); } IncrReplaced(); (estate->es_processed)++; (resultRelInfo->ri_aoprocessed)++; /* * Note: instead of having to update the old index tuples associated with * the heap tuple, all we do is form and insert new index tuples. This is * because UPDATEs are actually DELETEs and INSERTs, and index tuple * deletion is done later by VACUUM (see notes in ExecDelete). All we do * here is insert new index tuples. -cim 9/27/89 */ /* * insert index entries for tuple * * Note: heap_update returns the tid (location) of the new tuple in the * t_self field. */ if (rel_is_aorows || rel_is_aocols) { if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false); } else { if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(partslot, &(((HeapTuple) tuple)->t_self), estate, false); } /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple); }
/* * Create append-only auxiliary relations for target relation rel. * Returns true if they are newly created. If pg_appendonly has already * known those tables, don't create them and returns false. */ bool CreateAOAuxiliaryTable( Relation rel, const char *auxiliaryNamePrefix, char relkind, TupleDesc tupledesc, IndexInfo *indexInfo, Oid *classObjectId, int16 *coloptions) { char aoauxiliary_relname[NAMEDATALEN]; char aoauxiliary_idxname[NAMEDATALEN]; bool shared_relation; Oid relOid, aoauxiliary_relid = InvalidOid; Oid aoauxiliary_idxid = InvalidOid; ObjectAddress baseobject; ObjectAddress aoauxiliaryobject; Assert(RelationIsValid(rel)); Assert(RelationIsAoRows(rel) || RelationIsAoCols(rel)); Assert(auxiliaryNamePrefix); Assert(tupledesc); Assert(classObjectId); if (relkind != RELKIND_AOSEGMENTS) Assert(indexInfo); shared_relation = rel->rd_rel->relisshared; /* * We cannot allow creating an auxiliary table for a shared relation * after initdb (because there's no way to let other databases know * this visibility map. */ if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot have append-only auxiliary relations after initdb"))); relOid = RelationGetRelid(rel); switch(relkind) { case RELKIND_AOVISIMAP: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, NULL, NULL, &aoauxiliary_relid, &aoauxiliary_idxid); break; case RELKIND_AOBLOCKDIR: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, &aoauxiliary_relid, &aoauxiliary_idxid, NULL, NULL); break; case RELKIND_AOSEGMENTS: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, &aoauxiliary_relid, NULL, NULL, NULL, NULL); break; default: elog(ERROR, "unsupported auxiliary relkind '%c'", relkind); } /* * Does it have the auxiliary relation? */ if (OidIsValid(aoauxiliary_relid)) { return false; } snprintf(aoauxiliary_relname, sizeof(aoauxiliary_relname), "%s_%u", auxiliaryNamePrefix, relOid); snprintf(aoauxiliary_idxname, sizeof(aoauxiliary_idxname), "%s_%u_index", auxiliaryNamePrefix, relOid); /* * We place auxiliary relation in the pg_aoseg namespace * even if its master relation is a temp table. There cannot be * any naming collision, and the auxiliary relation will be * destroyed when its master is, so there is no need to handle * the aovisimap relation as temp. */ aoauxiliary_relid = heap_create_with_catalog(aoauxiliary_relname, PG_AOSEGMENT_NAMESPACE, rel->rd_rel->reltablespace, InvalidOid, rel->rd_rel->relowner, tupledesc, /* relam */ InvalidOid, relkind, RELSTORAGE_HEAP, shared_relation, true, /* bufferPoolBulkLoad */ false, 0, ONCOMMIT_NOOP, NULL, /* GP Policy */ (Datum) 0, true, /* valid_opts */ false, /* persistentTid */ NULL, /* persistentSerialNum */ NULL); /* Make this table visible, else index creation will fail */ CommandCounterIncrement(); /* Create an index on AO auxiliary tables (like visimap) except for pg_aoseg table */ if (relkind != RELKIND_AOSEGMENTS) { aoauxiliary_idxid = index_create(aoauxiliary_relid, aoauxiliary_idxname, InvalidOid, indexInfo, BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, coloptions, (Datum) 0, true, false, true, false, false, NULL); /* Unlock target table -- no one can see it */ UnlockRelationOid(aoauxiliary_relid, ShareLock); /* Unlock the index -- no one can see it anyway */ UnlockRelationOid(aoauxiliary_idxid, AccessExclusiveLock); } /* * Store the auxiliary table's OID in the parent relation's pg_appendonly row. * TODO (How to generalize this?) */ switch (relkind) { case RELKIND_AOVISIMAP: UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid, InvalidOid, aoauxiliary_relid, aoauxiliary_idxid); break; case RELKIND_AOBLOCKDIR: UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, aoauxiliary_relid, aoauxiliary_idxid, InvalidOid, InvalidOid); break; case RELKIND_AOSEGMENTS: UpdateAppendOnlyEntryAuxOids(relOid, aoauxiliary_relid, InvalidOid, InvalidOid, InvalidOid, InvalidOid); break; default: elog(ERROR, "unsupported auxiliary relkind '%c'", relkind); } /* * Register dependency from the auxiliary table to the master, so that the * aoseg table will be deleted if the master is. */ baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; aoauxiliaryobject.classId = RelationRelationId; aoauxiliaryobject.objectId = aoauxiliary_relid; aoauxiliaryobject.objectSubId = 0; recordDependencyOn(&aoauxiliaryobject, &baseobject, DEPENDENCY_INTERNAL); /* * Make changes visible */ CommandCounterIncrement(); return true; }
static Datum gp_aovisimap_internal(PG_FUNCTION_ARGS, Oid aoRelOid) { Datum values[3]; bool nulls[3]; HeapTuple tuple; Datum result; typedef struct Context { Relation aorel; AppendOnlyVisimapScan visiMapScan; AOTupleId aoTupleId; } Context; FuncCallContext *funcctx; Context *context; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function * calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ tupdesc = CreateTemplateTupleDesc(3, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tid", TIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "row_num", INT8OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ context = (Context *) palloc0(sizeof(Context)); context->aorel = heap_open(aoRelOid, AccessShareLock); if (!(RelationIsAoRows(context->aorel) || RelationIsAoCols(context->aorel))) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Function not supported on relation"))); } AppendOnlyVisimapScan_Init(&context->visiMapScan, context->aorel->rd_appendonly->visimaprelid, context->aorel->rd_appendonly->visimapidxid, AccessShareLock, SnapshotNow); AOTupleIdInit_Init(&context->aoTupleId); funcctx->user_fctx = (void *) context; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); context = (Context *) funcctx->user_fctx; while (true) { if (!AppendOnlyVisimapScan_GetNextInvisible( &context->visiMapScan, &context->aoTupleId)) { break; } MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); values[0] = ItemPointerGetDatum((ItemPointer)&context->aoTupleId); values[1] = Int32GetDatum(AOTupleIdGet_segmentFileNum(&context->aoTupleId)); values[2] = Int64GetDatum(AOTupleIdGet_rowNum(&context->aoTupleId)); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } AppendOnlyVisimapScan_Finish(&context->visiMapScan, AccessShareLock); heap_close(context->aorel, AccessShareLock); pfree(context); funcctx->user_fctx = NULL; SRF_RETURN_DONE(funcctx); }
/* * Returns true iff the given segment file should be compacted. */ bool AppendOnlyCompaction_ShouldCompact( Relation aoRelation, AppendOnlyEntry *aoEntry, int segno, int64 segmentTotalTupcount, bool isFull) { bool result; AppendOnlyVisimap visiMap; int64 hiddenTupcount; int hideRatio; Assert(RelationIsAoRows(aoRelation) || RelationIsAoCols(aoRelation)); if (!gp_appendonly_compaction) { ereport(LOG, (errmsg("Append-only compaction skipped on relation %s, segment file num %d", RelationGetRelationName(aoRelation), segno), errdetail("Compaction is disabled"))); /* Disable compaction by global guc. */ return false; } AppendOnlyVisimap_Init(&visiMap, aoEntry->visimaprelid, aoEntry->visimapidxid, ShareLock, SnapshotNow); hiddenTupcount = AppendOnlyVisimap_GetSegmentFileHiddenTupleCount( &visiMap, segno); result = true; if (isFull && hiddenTupcount > 0) { /* * if it is a full vacuum and there is any obsolete data, do a compaction */ result = true; } else { hideRatio = AppendOnlyCompaction_GetHideRatio(hiddenTupcount, segmentTotalTupcount); if (hideRatio <= gp_appendonly_compaction_threshold || gp_appendonly_compaction_threshold == 0) { if (hiddenTupcount > 0) { ereportif(Debug_appendonly_print_compaction, LOG, (errmsg("Append-only compaction skipped on relation %s, segment file num %d, " "hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " "hide ratio %d%%, threshold %d%%", RelationGetRelationName(aoRelation), segno, hiddenTupcount, segmentTotalTupcount, hideRatio, gp_appendonly_compaction_threshold))); ereport(LOG, (errmsg("Append-only compaction skipped on relation %s, segment file num %d", RelationGetRelationName(aoRelation), segno), errdetail("Ratio of obsolete tuples below threshold (%d%% vs %d%%)", hideRatio, gp_appendonly_compaction_threshold))); } else { ereportif(Debug_appendonly_print_compaction, LOG, (errmsg("Append-only compaction skipped on relation %s, segment file num %d, " "hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " "hide ratio %d%%, threshold %d%%", RelationGetRelationName(aoRelation), segno, hiddenTupcount, segmentTotalTupcount, hideRatio, gp_appendonly_compaction_threshold))); } result = false; } elogif(Debug_appendonly_print_compaction, LOG, "Schedule compaction: " "segno %d, " "hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " "hide ratio %d%%, threshold %d%%", segno, hiddenTupcount, segmentTotalTupcount, hideRatio, gp_appendonly_compaction_threshold); } AppendOnlyVisimap_Finish(&visiMap, ShareLock); return result; }
static Datum gp_aovisimap_entry_internal(PG_FUNCTION_ARGS, Oid aoRelOid) { Datum values[4]; bool nulls[4]; HeapTuple tuple; Datum result; typedef struct Context { AppendOnlyVisimap visiMap; Relation parentRelation; IndexScanDesc indexScan; text *bitmapBuffer; } Context; FuncCallContext *funcctx; Context *context; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function * calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ tupdesc = CreateTemplateTupleDesc(4, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_num", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hidden_tupcount", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "bitmap", TEXTOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ context = (Context *) palloc0(sizeof(Context)); context->parentRelation = heap_open(aoRelOid, AccessShareLock); if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation))) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Function not supported on relation"))); } AppendOnlyVisimap_Init(&context->visiMap, context->parentRelation->rd_appendonly->visimaprelid, context->parentRelation->rd_appendonly->visimapidxid, AccessShareLock, SnapshotNow); context->indexScan = AppendOnlyVisimapStore_BeginScan(& context->visiMap.visimapStore, 0, NULL); context->bitmapBuffer = palloc0(VARHDRSZ + APPENDONLY_VISIMAP_MAX_RANGE + 1); funcctx->user_fctx = (void *) context; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); context = (Context *) funcctx->user_fctx; if (AppendOnlyVisimapStore_GetNext(&context->visiMap.visimapStore, context->indexScan, ForwardScanDirection, &context->visiMap.visimapEntry, NULL)) { AppendOnlyVisimapEntry *visimapEntry = &context->visiMap.visimapEntry; MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); values[0] = Int32GetDatum(visimapEntry->segmentFileNum); values[1] = Int64GetDatum(visimapEntry->firstRowNum); values[2] = Int32GetDatum( (int32)AppendOnlyVisimapEntry_GetHiddenTupleCount(visimapEntry)); gp_aovisimap_encode_bitmap(VARDATA(context->bitmapBuffer), visimapEntry->bitmap); SET_VARSIZE(context->bitmapBuffer, APPENDONLY_VISIMAP_MAX_RANGE); values[3] = PointerGetDatum(context->bitmapBuffer); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } AppendOnlyVisimapStore_EndScan(&context->visiMap.visimapStore, context->indexScan); AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock); heap_close(context->parentRelation, AccessShareLock); pfree(context->bitmapBuffer); pfree(context); funcctx->user_fctx = NULL; SRF_RETURN_DONE(funcctx); }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. * * The return value indicates whether this function has held off * interrupts -- caller must RESUME_INTERRUPTS() after commit if true. */ bool lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy, List *updated_stats) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; bool heldoff = false; pg_rusage_init(&ru0); /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0) starttime = GetCurrentTimestamp(); if (vacstmt->verbose) elevel = INFO; else elevel = DEBUG2; if (Gp_role == GP_ROLE_DISPATCH) elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ #ifdef FAULT_INJECTOR if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeSegmentFileDropPhase, DDLNotSpecified, "", // databaseName ""); // tableName } if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeCleanupPhase, DDLNotSpecified, "", // databaseName ""); // tableName } #endif /* * MPP-23647. Update xid limits for heap as well as appendonly * relations. This allows setting relfrozenxid to correct value * for an appendonly (AO/CO) table. */ vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit); /* * Execute the various vacuum operations. Appendonly tables are treated * differently. */ if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel)) { lazy_vacuum_aorel(onerel, vacstmt, updated_stats); return false; } vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); /* heap relation */ /* Set threshold for interesting free space = average request size */ /* XXX should we scale it up or down? Adjust vacuum.c too, if so */ vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node); vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. * * Note that after we've truncated the heap, it's too late to abort the * transaction; doing so would lose the sinval messages needed to tell * the other backends about the table being shrunk. We prevent interrupts * in that case; caller is responsible for re-enabling them after * committing the transaction. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) { HOLD_INTERRUPTS(); heldoff = true; lazy_truncate_heap(onerel, vacrelstats); } /* Update shared free space map with final free space info */ lazy_update_fsm(onerel, vacrelstats); if (vacrelstats->tot_free_pages > MaxFSMPages) ereport(WARNING, (errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space", get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel)), /* Only suggest VACUUM FULL if > 20% free */ (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ? errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") : errhint("Consider increasing the configuration parameter \"max_fsm_pages\"."))); /* Update statistics in pg_class */ vac_update_relstats_from_list(onerel, vacrelstats->rel_pages, vacrelstats->rel_tuples, vacrelstats->hasindex, FreezeLimit, updated_stats); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, true /*vacrelstats->scanned_all*/, vacstmt->analyze, vacrelstats->rel_tuples); if (gp_indexcheck_vacuum == INDEX_CHECK_ALL || (gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM && PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel))) { int i; for (i = 0; i < nindexes; i++) { if (Irel[i]->rd_rel->relam == BTREE_AM_OID) _bt_validate_vacuum(Irel[i], onerel, OldestXmin); } } /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), Log_autovacuum_min_duration)) ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->rel_tuples, pg_rusage_show(&ru0)))); } return heldoff; }
/* ---------------------------------------------------------------- * ExecInsert * * INSERTs have to add the tuple into * the base relation and insert appropriate tuples into the * index relations. * Insert can be part of an update operation when * there is a preceding SplitUpdate node. * ---------------------------------------------------------------- */ void ExecInsert(TupleTableSlot *slot, DestReceiver *dest, EState *estate, PlanGenerator planGen, bool isUpdate) { void *tuple = NULL; ResultRelInfo *resultRelInfo = NULL; Relation resultRelationDesc = NULL; Oid newId = InvalidOid; TupleTableSlot *partslot = NULL; AOTupleId aoTupleId = AOTUPLEID_INIT; bool rel_is_heap = false; bool rel_is_aorows = false; bool rel_is_aocols = false; bool rel_is_external = false; /* * get information on the (current) result relation */ if (estate->es_result_partitions) { resultRelInfo = slot_get_partition(slot, estate); /* Check whether the user provided the correct leaf part only if required */ if (!dml_ignore_target_partition_check) { Assert(NULL != estate->es_result_partitions->part && NULL != resultRelInfo->ri_RelationDesc); List *resultRelations = estate->es_plannedstmt->resultRelations; /* * Only inheritance can generate multiple result relations and inheritance * is not compatible with partitions. As we are in inserting in partitioned * table, we should not have more than one resultRelation */ Assert(list_length(resultRelations) == 1); /* We only have one resultRelations entry where the user originally intended to insert */ int rteIdxForUserRel = linitial_int(resultRelations); Assert (rteIdxForUserRel > 0); Oid userProvidedRel = InvalidOid; if (1 == rteIdxForUserRel) { /* Optimization for typical case */ userProvidedRel = ((RangeTblEntry *) estate->es_plannedstmt->rtable->head->data.ptr_value)->relid; } else { userProvidedRel = getrelid(rteIdxForUserRel, estate->es_plannedstmt->rtable); } /* Error out if user provides a leaf partition that does not match with our calculated partition */ if (userProvidedRel != estate->es_result_partitions->part->parrelid && userProvidedRel != resultRelInfo->ri_RelationDesc->rd_id) { ereport(ERROR, (errcode(ERRCODE_CHECK_VIOLATION), errmsg("Trying to insert row into wrong partition"), errdetail("Expected partition: %s, provided partition: %s", resultRelInfo->ri_RelationDesc->rd_rel->relname.data, estate->es_result_relation_info->ri_RelationDesc->rd_rel->relname.data))); } } estate->es_result_relation_info = resultRelInfo; } else { resultRelInfo = estate->es_result_relation_info; } Assert (!resultRelInfo->ri_projectReturning); resultRelationDesc = resultRelInfo->ri_RelationDesc; rel_is_heap = RelationIsHeap(resultRelationDesc); rel_is_aocols = RelationIsAoCols(resultRelationDesc); rel_is_aorows = RelationIsAoRows(resultRelationDesc); rel_is_external = RelationIsExternal(resultRelationDesc); partslot = reconstructMatchingTupleSlot(slot, resultRelInfo); if (rel_is_heap) { tuple = ExecFetchSlotHeapTuple(partslot); } else if (rel_is_aorows) { tuple = ExecFetchSlotMemTuple(partslot, false); } else if (rel_is_external) { if (estate->es_result_partitions && estate->es_result_partitions->part->parrelid != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Insert into external partitions not supported."))); return; } else { tuple = ExecFetchSlotHeapTuple(partslot); } } else { Assert(rel_is_aocols); tuple = ExecFetchSlotMemTuple(partslot, true); } Assert(partslot != NULL && tuple != NULL); /* Execute triggers in Planner-generated plans */ if (planGen == PLANGEN_PLANNER) { /* BEFORE ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) { HeapTuple newtuple; /* NYI */ if(rel_is_aocols) elog(ERROR, "triggers are not supported on tables that use column-oriented storage"); newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); if (newtuple == NULL) /* "do nothing" */ { return; } if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */ tuple = newtuple; partslot = newslot; } } } /* * Check the constraints of the tuple */ if (resultRelationDesc->rd_att->constr && planGen == PLANGEN_PLANNER) { ExecConstraints(resultRelInfo, partslot, estate); } /* * insert the tuple * * Note: heap_insert returns the tid (location) of the new tuple in the * t_self field. * * NOTE: for append-only relations we use the append-only access methods. */ if (rel_is_aorows) { if (resultRelInfo->ri_aoInsertDesc == NULL) { /* Set the pre-assigned fileseg number to insert into */ ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aoInsertDesc = appendonly_insert_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno, false); } appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId); } else if (rel_is_aocols) { if (resultRelInfo->ri_aocsInsertDesc == NULL) { ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aocsInsertDesc = aocs_insert_init(resultRelationDesc, resultRelInfo->ri_aosegno, false); } newId = aocs_insert(resultRelInfo->ri_aocsInsertDesc, partslot); aoTupleId = *((AOTupleId*)slot_get_ctid(partslot)); } else if (rel_is_external) { /* Writable external table */ if (resultRelInfo->ri_extInsertDesc == NULL) resultRelInfo->ri_extInsertDesc = external_insert_init(resultRelationDesc); newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple); } else { Insist(rel_is_heap); newId = heap_insert(resultRelationDesc, tuple, estate->es_snapshot->curcid, true, true, GetCurrentTransactionId()); } IncrAppended(); (estate->es_processed)++; (resultRelInfo->ri_aoprocessed)++; estate->es_lastoid = newId; partslot->tts_tableOid = RelationGetRelid(resultRelationDesc); if (rel_is_aorows || rel_is_aocols) { /* * insert index entries for AO Row-Store tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false); } else { /* Use parttuple for index update in case this is an indexed heap table. */ TupleTableSlot *xslot = partslot; void *xtuple = tuple; setLastTid(&(((HeapTuple) xtuple)->t_self)); /* * insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false); } if (planGen == PLANGEN_PLANNER) { /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple); } }
void AlterTableCreateAoSegTable(Oid relOid, bool is_part_child, bool is_part_parent) { TupleDesc tupdesc; Relation rel; const char *prefix; /* * Grab an exclusive lock on the target table, which we will NOT release * until end of transaction. (This is probably redundant in all present * uses...) */ if (is_part_child) rel = heap_open(relOid, NoLock); else rel = heap_open(relOid, AccessExclusiveLock); if(RelationIsAoRows(rel)) { prefix = "pg_aoseg"; /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(8, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "eof", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "tupcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "varblockcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "eofuncompressed", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "modcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "formatversion", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "state", INT2OID, -1, 0); } else if (RelationIsAoCols(rel)) { prefix = "pg_aocsseg"; /* * XXX * At this moment, we hardwire the rel aocs info. * Essentially, we assume total vertical partition, and * we do not do datatype specific compression. * * In order to make things right, we need to first fix * the DefineRelation, so that we store the per column * info, then, we need to open the catalog, pull out * info here. */ /* * XXX We do not handle add/drop column etc nicely yet. */ /* * Assuming full vertical partition, we want to include * the following in the seg table. * * segno int, -- whatever purpose ao use it * tupcount bigint -- total tup * varblockcount bigint, -- total varblock * vpinfo varbinary(max) -- vertical partition info encoded in * binary. NEEDS TO BE REFACTORED * INTO MULTIPLE COLUMNS!! * state (smallint) -- state of the segment file */ tupdesc = CreateTemplateTupleDesc(7, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "tupcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "varblockcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "vpinfo", BYTEAOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "modcount", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "formatversion", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "state", INT2OID, -1, 0); } else { heap_close(rel, NoLock); return; } (void) CreateAOAuxiliaryTable(rel, prefix, RELKIND_AOSEGMENTS, tupdesc, NULL, NIL, NULL, NULL, is_part_parent); heap_close(rel, NoLock); }
/* * Truncates each segment file to the AOCS relation to its EOF. * If we cannot get a lock on the segment file (because e.g. a concurrent insert) * the segment file is skipped. */ void AOCSTruncateToEOF(Relation aorel) { const char *relname; int total_segfiles; AOCSFileSegInfo **segfile_array; int i, segno; LockAcquireResult acquireResult; AOCSFileSegInfo *fsinfo; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); Assert(RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles); for (i = 0; i < total_segfiles; i++) { segno = segfile_array[i]->segno; /* * Try to get the transaction write-lock for the Append-Only segment * file. * * NOTE: This is a transaction scope lock that must be held until * commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "truncate skips AO segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting (possibly * index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); AOCSSegmentFileTruncateToEOF(aorel, fsinfo); pfree(fsinfo); } if (segfile_array) { FreeAllAOCSSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } UnregisterSnapshot(appendOnlyMetaDataSnapshot); }