/* * Returns true if the relation has no tuples. Prepare phase of * compaction invokes this function on each QE. * * Examples of empty tables: * 1. parent of a partitioned table * 2. table that is created but no tuples have been inserted yet * 3. table from which all existing tuples are deleted and the table * is vacuumed. This is a special case in which pg_aoseg_<oid> has * non-zero number of rows but tupcount value is zero for all rows. */ bool AppendOnlyCompaction_IsRelationEmpty(Relation aorel) { AppendOnlyEntry *aoEntry; Relation pg_aoseg_rel; TupleDesc pg_aoseg_dsc; HeapTuple tuple; HeapScanDesc aoscan; int Anum_tupcount; bool empty = true; Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel)); aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow); pg_aoseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); aoscan = heap_beginscan(pg_aoseg_rel, SnapshotNow, 0, NULL); Anum_tupcount = RelationIsAoRows(aorel)? Anum_pg_aoseg_tupcount: Anum_pg_aocs_tupcount; while ((tuple = heap_getnext(aoscan, ForwardScanDirection)) != NULL && empty) { if (0 < fastgetattr(tuple, Anum_tupcount, pg_aoseg_dsc, NULL)) empty = false; } heap_endscan(aoscan); heap_close(pg_aoseg_rel, AccessShareLock); return empty; }
/** * Initialize and get ready for inserting values into the parquet table. If the * parquet insert descriptor is not created, NULL is returned. * * @rel the relation to insert * @segno the segment number */ ParquetInsertDesc parquet_insert_init(Relation rel, ResultRelSegFileInfo *segfileinfo) { ParquetInsertDesc parquetInsertDesc = NULL; AppendOnlyEntry *aoentry = NULL; MemoryContext oldMemoryContext = NULL; StringInfoData titleBuf; int relNameLen = 0; /* * Get the pg_appendonly information for this table */ aoentry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); /* The parquet entry must exist and is at right version. The version number is hard coded when the entry is created. */ Assert(aoentry != NULL); Assert(aoentry->majorversion == 1 && aoentry->minorversion == 0); parquetInsertDesc = (ParquetInsertDesc) palloc0(sizeof(ParquetInsertDescData)); parquetInsertDesc->memoryContext = CurrentMemoryContext; oldMemoryContext = MemoryContextSwitchTo(parquetInsertDesc->memoryContext); parquetInsertDesc->parquet_rel = rel; relNameLen = strlen(rel->rd_rel->relname.data); parquetInsertDesc->relname = (char*)palloc0(relNameLen + 1); memcpy(parquetInsertDesc->relname, rel->rd_rel->relname.data, relNameLen); parquetInsertDesc->parquetMetaDataSnapshot = SnapshotNow; parquetInsertDesc->parquet_file = -1; parquetInsertDesc->parquetFilePathNameMaxLen = AOSegmentFilePathNameLen(rel) + 1; parquetInsertDesc->parquetFilePathName = (char*) palloc0(parquetInsertDesc->parquetFilePathNameMaxLen); parquetInsertDesc->parquetFilePathName[0] = '\0'; parquetInsertDesc->footerProtocol = NULL; Assert(segfileinfo->segno >= 0); parquetInsertDesc->cur_segno = segfileinfo->segno; parquetInsertDesc->aoEntry = aoentry; parquetInsertDesc->insertCount = 0; initStringInfo(&titleBuf); appendStringInfo(&titleBuf, "Write of Parquet relation '%s'", RelationGetRelationName(parquetInsertDesc->parquet_rel)); parquetInsertDesc->title = titleBuf.data; parquetInsertDesc->mirroredOpen = (MirroredAppendOnlyOpen *) palloc0(sizeof(MirroredAppendOnlyOpen)); parquetInsertDesc->mirroredOpen->isActive = FALSE; parquetInsertDesc->mirroredOpen->segmentFileNum = 0; parquetInsertDesc->mirroredOpen->primaryFile = -1; parquetInsertDesc->previous_rowgroupcnt = 0; /* open our current relation file segment for write */ SetCurrentFileSegForWrite(parquetInsertDesc, segfileinfo); /* Allocation is done. Go back to caller memory-context. */ MemoryContextSwitchTo(oldMemoryContext); return parquetInsertDesc; }
static void gp_statistics_estimate_reltuples_relpages_ao_rows(Relation rel, float4 *reltuples, float4 *relpages) { FileSegTotals *fstotal; AppendOnlyEntry *aoEntry; AppendOnlyVisimap visimap; int64 hidden_tupcount = 0; /** * Ensure that the right kind of relation with the right type of storage is passed to us. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); Assert(RelationIsAoRows(rel)); fstotal = GetSegFilesTotals(rel, SnapshotNow); Assert(fstotal); /** * The planner doesn't understand AO's blocks, so need this method to try to fudge up a number for * the planner. */ *relpages = RelationGuessNumberOfBlocks((double)fstotal->totalbytes); aoEntry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, SnapshotNow); hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap); AppendOnlyVisimap_Finish(&visimap, AccessShareLock); /** * The number of tuples in AO table is known accurately. Therefore, we just utilize this value. */ *reltuples = (double)(fstotal->totaltuples - hidden_tupcount); pfree(fstotal); pfree(aoEntry); return; }
/* * Fills in the relation statistics for an append-only relation. * * This information is used to update the reltuples and relpages information * in pg_class. reltuples is the same as "pg_aoseg_<oid>:tupcount" * column and we simulate relpages by subdividing the eof value * ("pg_aoseg_<oid>:eof") over the defined page size. */ void vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, BlockNumber *rel_pages, double *rel_tuples, bool *relhasindex) { FileSegTotals *fstotal; BlockNumber nblocks; char *relname; double num_tuples; double totalbytes; double eof; int64 hidden_tupcount; AppendOnlyVisimap visimap; AppendOnlyEntry *aoEntry; Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel)); relname = RelationGetRelationName(aorel); /* get updated statistics from the pg_aoseg table */ if (RelationIsAoRows(aorel)) { fstotal = GetSegFilesTotals(aorel, snapshot); } else { Assert(RelationIsAoCols(aorel)); fstotal = GetAOCSSSegFilesTotals(aorel, snapshot); } /* calculate the values we care about */ eof = (double)fstotal->totalbytes; num_tuples = (double)fstotal->totaltuples; totalbytes = eof; nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes); aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), snapshot); AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, snapshot); hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap); num_tuples -= hidden_tupcount; Assert(num_tuples > -1.0); AppendOnlyVisimap_Finish(&visimap, AccessShareLock); pfree(aoEntry); elogif (Debug_appendonly_print_compaction, LOG, "Gather statistics after vacuum for append-only relation %s: " "page count %d, tuple count %f", relname, nblocks, num_tuples); *rel_pages = nblocks; *rel_tuples = num_tuples; *relhasindex = aorel->rd_rel->relhasindex; ereport(elevel, (errmsg("\"%s\": found %.0f rows in %u pages.", relname, num_tuples, nblocks))); pfree(fstotal); }
static void gp_statistics_estimate_reltuples_relpages_ao_cs(Relation rel, float4 *reltuples, float4 *relpages) { AOCSFileSegInfo **aocsInfo = NULL; int nsegs = 0; double totalBytes = 0; AppendOnlyEntry *aoEntry; int64 hidden_tupcount; AppendOnlyVisimap visimap; /** * Ensure that the right kind of relation with the right type of storage is passed to us. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); Assert(RelationIsAoCols(rel)); *reltuples = 0.0; *relpages = 0.0; /* get table level statistics from the pg_aoseg table */ aoEntry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); aocsInfo = GetAllAOCSFileSegInfo(rel, aoEntry, SnapshotNow, &nsegs); if (aocsInfo) { int i = 0; int j = 0; for(i = 0; i < nsegs; i++) { for(j = 0; j < RelationGetNumberOfAttributes(rel); j++) { AOCSVPInfoEntry *e = getAOCSVPEntry(aocsInfo[i], j); Assert(e); totalBytes += e->eof_uncompressed; } /* Do not include tuples from an awaiting drop segment file */ if (aocsInfo[i]->state != AOSEG_STATE_AWAITING_DROP) { *reltuples += aocsInfo[i]->total_tupcount; } } /** * The planner doesn't understand AO's blocks, so need this method to try to fudge up a number for * the planner. */ *relpages = RelationGuessNumberOfBlocks(totalBytes); } AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, SnapshotNow); hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap); AppendOnlyVisimap_Finish(&visimap, AccessShareLock); (*reltuples) -= hidden_tupcount; pfree(aoEntry); return; }
uint32 RelationGetRelationBlocksize(Relation rel) { AppendOnlyEntry *aoentry; aoentry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); return aoentry->blocksize; }
/* * GetSegFilesTotals * * Get the total bytes and tuples for a specific parquet table * from the pg_aoseg table on this local segdb. */ ParquetFileSegTotals *GetParquetSegFilesTotals(Relation parentrel, Snapshot parquetMetaDataSnapshot) { Relation pg_paqseg_rel; TupleDesc pg_paqseg_dsc; HeapTuple tuple; SysScanDesc paqscan; ParquetFileSegTotals *result; Datum eof, eof_uncompressed, tupcount; bool isNull; AppendOnlyEntry *aoEntry = NULL; Assert(RelationIsParquet(parentrel)); aoEntry = GetAppendOnlyEntry(RelationGetRelid(parentrel), parquetMetaDataSnapshot); result = (ParquetFileSegTotals *) palloc0(sizeof(ParquetFileSegTotals)); pg_paqseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_paqseg_dsc = RelationGetDescr(pg_paqseg_rel); paqscan = systable_beginscan(pg_paqseg_rel, InvalidOid, FALSE, parquetMetaDataSnapshot, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(paqscan))) { eof = fastgetattr(tuple, Anum_pg_parquetseg_eof, pg_paqseg_dsc, &isNull); tupcount = fastgetattr(tuple, Anum_pg_parquetseg_tupcount, pg_paqseg_dsc, &isNull); eof_uncompressed = fastgetattr(tuple, Anum_pg_parquetseg_eofuncompressed, pg_paqseg_dsc, &isNull); if(isNull) result->totalbytesuncompressed = InvalidUncompressedEof; else result->totalbytesuncompressed += (int64)DatumGetFloat8(eof_uncompressed); result->totalbytes += (int64)DatumGetFloat8(eof); result->totaltuples += (int64)DatumGetFloat8(tupcount); result->totalfilesegs++; CHECK_FOR_INTERRUPTS(); } systable_endscan(paqscan); heap_close(pg_paqseg_rel, AccessShareLock); pfree(aoEntry); return result; }
/** * Given parquet table oid, return the memory reserved for parquet table insert operator. * For uncompressed table, the whole rowgroup is stored in memory before written to disk, * so we can keep a memory quota of rowgroup size for it. * For compressed table, besides the compressed rowgroup, there's a page buffer for each column * storing the original uncompressed data, so the max memory consumption under worst case is * 2 times of rowgroup size. * @rel_oid The oid of relation to be inserted * @return The memory allocated for this table insert */ uint64 memReservedForParquetInsert(Oid rel_oid) { uint64 memReserved = 0; char *compresstype = NULL; AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(rel_oid, SnapshotNow); memReserved = aoEntry->blocksize; compresstype = aoEntry->compresstype; if (compresstype && (strcmp(compresstype, "none") != 0)){ memReserved *= 2; } pfree(aoEntry); return memReserved; }
/* * Has the same signature as RelationGetAttributeCompressionFuncs() even though * we don't actually need the full Relation data structure. I deem consistency * of API more important in this case. */ PGFunction * RelationGetRelationCompressionFuncs(Relation rel) { AppendOnlyEntry *aoentry; char *comptype = NULL; PGFunction *compFuncs; if(RelationIsAoRows(rel) || RelationIsParquet(rel)){ aoentry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow); comptype = aoentry->compresstype; } compFuncs = get_funcs_for_compression(comptype); return compFuncs; }
/* * Compute the on-disk size of files for the relation according to the * stat function, including heap data, index data, toast data, aoseg data, * aoblkdir data, and aovisimap data. */ static int64 calculate_total_relation_size(Oid Relid) { Relation heapRel; Oid toastOid; AppendOnlyEntry *aoEntry = NULL; int64 size; ListCell *cell; heapRel = try_relation_open(Relid, AccessShareLock, false); if (!RelationIsValid(heapRel)) return 0; toastOid = heapRel->rd_rel->reltoastrelid; if (RelationIsAoRows(heapRel) || RelationIsAoCols(heapRel)) aoEntry = GetAppendOnlyEntry(Relid, SnapshotNow); /* Get the heap size */ if (Relid == 0 || heapRel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(heapRel); /* Include any dependent indexes */ if (heapRel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(heapRel); foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation iRel; iRel = try_relation_open(idxOid, AccessShareLock, false); if (RelationIsValid(iRel)) { size += calculate_relation_size(iRel); relation_close(iRel, AccessShareLock); } }
/* * GetParquetTotalBytes * * Get the total bytes for a specific parquet table from the pg_aoseg table on master. * * In hawq, master keep all segfile info in pg_aoseg table, * therefore it get the whole table size. */ int64 GetParquetTotalBytes(Relation parentrel, Snapshot parquetMetaDataSnapshot) { Relation pg_paqseg_rel; TupleDesc pg_paqseg_dsc; HeapTuple tuple; SysScanDesc parquetscan; int64 result; Datum eof; bool isNull; AppendOnlyEntry *aoEntry = NULL; aoEntry = GetAppendOnlyEntry(RelationGetRelid(parentrel), parquetMetaDataSnapshot); result = 0; pg_paqseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_paqseg_dsc = RelationGetDescr(pg_paqseg_rel); Assert (Gp_role != GP_ROLE_EXECUTE); parquetscan = systable_beginscan(pg_paqseg_rel, InvalidOid, FALSE, parquetMetaDataSnapshot, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(parquetscan))) { eof = fastgetattr(tuple, Anum_pg_parquetseg_eof, pg_paqseg_dsc, &isNull); Assert(!isNull); result += (int64)DatumGetFloat8(eof); CHECK_FOR_INTERRUPTS(); } systable_endscan(parquetscan); heap_close(pg_paqseg_rel, AccessShareLock); pfree(aoEntry); return result; }
/** *begin scanning of a parquet relation */ ParquetScanDesc parquet_beginscan( Relation relation, Snapshot parquetMetaDataSnapshot, TupleDesc relationTupleDesc, bool *proj) { ParquetScanDesc scan; AppendOnlyEntry *aoEntry; AppendOnlyStorageAttributes *attr; /* * increment relation ref count while scanning relation * * This is just to make really sure the relcache entry won't go away while * the scan has a pointer to it. Caller should be holding the rel open * anyway, so this is redundant in all normal scenarios... */ RelationIncrementReferenceCount(relation); /* allocate scan descriptor */ scan = (ParquetScanDescData *)palloc0(sizeof(ParquetScanDescData)); /* * Get the pg_appendonly information for this table */ aoEntry = GetAppendOnlyEntry(RelationGetRelid(relation), parquetMetaDataSnapshot); scan->aoEntry = aoEntry; Assert(aoEntry->majorversion == 1 && aoEntry->minorversion == 0); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FailQeWhenBeginParquetScan, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * initialize the scan descriptor */ scan->pqs_filenamepath_maxlen = AOSegmentFilePathNameLen(relation) + 1; scan->pqs_filenamepath = (char*)palloc0(scan->pqs_filenamepath_maxlen); scan->pqs_rd = relation; scan->parquetScanInitContext = CurrentMemoryContext; /* * Fill in Parquet Storage layer attributes. */ attr = &scan->storageAttributes; /* * These attributes describe the AppendOnly format to be scanned. */ if (aoEntry->compresstype == NULL || pg_strcasecmp(aoEntry->compresstype, "none") == 0) attr->compress = false; else attr->compress = true; if (aoEntry->compresstype != NULL) attr->compressType = aoEntry->compresstype; else attr->compressType = "none"; attr->compressLevel = aoEntry->compresslevel; attr->checksum = aoEntry->checksum; attr->safeFSWriteSize = aoEntry->safefswritesize; attr->splitsize = aoEntry->splitsize; attr->version = aoEntry->version; AORelationVersion_CheckValid(attr->version); scan->proj = proj; scan->pqs_tupDesc = (relationTupleDesc == NULL) ? RelationGetDescr(relation) : relationTupleDesc; scan->hawqAttrToParquetColChunks = (int*)palloc0(scan->pqs_tupDesc->natts * sizeof(int)); initscan(scan); return scan ; }
uint64 memReservedForParquetScan(Oid rel_oid, List* attr_list) { uint64 rowgroupsize = 0; char *compresstype = NULL; uint64 memReserved = 0; int attrNum = get_relnatts(rel_oid); /*Get the total attribute number of the relation*/ uint64 attsWidth = 0; /*the sum width of attributes to be scanned*/ uint64 recordWidth = 0; /*the average width of one record in the relation*/ /* The width array for all the attributes in the relation*/ int32 *attWidth = (int32*)palloc0(attrNum * sizeof(int32)); /** The variables for traversing through attribute list*/ ListCell *cell; /* Get rowgroup size and compress type */ AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(rel_oid, SnapshotNow); rowgroupsize = aoEntry->blocksize; compresstype = aoEntry->compresstype; /** For each column in the relation, get the column width * 1) Get the column width from pg_attribute, estimate column width for to-be-scanned columns: * If fixed column width, the attlen is the column width; if not fixed, refer to typmod * 2) Get the average column width for variable length type column from table pg_statistic, if the * stawidth not equals 0, set it as the column width. */ for(int i = 0; i < attrNum; i++){ int att_id = i + 1; HeapTuple attTuple = caql_getfirst(NULL, cql("SELECT * FROM pg_attribute" " WHERE attrelid = :1 " " AND attnum = :2 ", ObjectIdGetDatum(rel_oid), Int16GetDatum(att_id))); if (HeapTupleIsValid(attTuple)) { /*Step1: estimate attwidth according to pg_attributes*/ Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attTuple); estimateColumnWidth(attWidth, &i, att, false); i--; int32 stawidth = 0; /*Step2: adjust addwidth according to pg_statistic*/ switch (att->atttypid) { case HAWQ_TYPE_VARCHAR: case HAWQ_TYPE_TEXT: case HAWQ_TYPE_XML: case HAWQ_TYPE_PATH: case HAWQ_TYPE_POLYGON: stawidth = get_attavgwidth(rel_oid, att_id); if(stawidth != 0) attWidth[i] = stawidth; break; case HAWQ_TYPE_VARBIT: stawidth = get_attavgwidth(rel_oid, att_id); if(stawidth != 0) attWidth[i] = stawidth + 4; break; default: break; } } recordWidth += attWidth[i]; } /* Reverse through the to-be-scanned attribute list, sum up the width */ Assert (1 <= list_length(attr_list)); foreach(cell, attr_list) { AttrNumber att_id = lfirst_int(cell); Assert(1 <= att_id); Assert(att_id <= attrNum); attsWidth += attWidth[att_id - 1]; /*sum up the attribute width in the to-be-scanned list*/ }
/* * Performs a compaction of an append-only relation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. If * set, the insert_segno should also be marked as in-use. * When the insert segno is negative, only truncate to eof operations * can be executed. * * The caller is required to hold either an AccessExclusiveLock (vacuum full) * or a ShareLock on the relation. */ void AppendOnlyCompact(Relation aorel, List* compaction_segno, int insert_segno, bool isFull) { const char* relname; int total_segfiles; FileSegInfo** segfile_array; AppendOnlyInsertDesc insertDesc = NULL; int i, segno; FileSegInfo* fsinfo; Assert (Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(insert_segno >= 0); relname = RelationGetRelationName(aorel); AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow); elogif (Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllFileSegInfo(aorel, aoEntry, SnapshotNow, &total_segfiles); insertDesc = appendonly_insert_init(aorel, SnapshotNow, insert_segno, false); for(i = 0 ; i < total_segfiles ; i++) { segno = segfile_array[i]->segno; if (list_find_int(compaction_segno, segno) < 0) { continue; } if (segno == insert_segno) { /* We cannot compact the segment file we are inserting to. */ continue; } /* * Try to get the transaction write-lock for the Append-Only segment file. * * NOTE: This is a transaction scope lock that must be held until commit / abort. */ LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, false); /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetFileSegInfo(aorel, aoEntry, SnapshotNow, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting * (possibly index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AO relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); if (AppendOnlyCompaction_ShouldCompact(aorel, aoEntry, fsinfo->segno, fsinfo->total_tupcount, isFull)) { AppendOnlySegmentFileFullCompaction(aorel, aoEntry, insertDesc, fsinfo); } pfree(fsinfo); } appendonly_insert_finish(insertDesc); pfree(aoEntry); if (segfile_array) { FreeAllSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } }
/* * Truncates each segment file to the AO relation to its EOF. * If we cannot get a lock on the segment file (because e.g. a concurrent insert) * the segment file is skipped. */ void AppendOnlyTruncateToEOF(Relation aorel) { const char* relname; int total_segfiles; FileSegInfo** segfile_array; int i, segno; LockAcquireResult acquireResult; FileSegInfo* fsinfo; Assert (RelationIsAoRows(aorel)); relname = RelationGetRelationName(aorel); AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow); elogif (Debug_appendonly_print_compaction, LOG, "Compact AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllFileSegInfo(aorel, aoEntry, SnapshotNow, &total_segfiles); for(i = 0 ; i < total_segfiles ; i++) { segno = segfile_array[i]->segno; /* * Try to get the transaction write-lock for the Append-Only segment file. * * NOTE: This is a transaction scope lock that must be held until commit / abort. */ acquireResult = LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, /* dontWait */ true); if (acquireResult == LOCKACQUIRE_NOT_AVAIL) { elog(DEBUG5, "truncate skips AO segfile %d, " "relation %s", segfile_array[i]->segno, relname); continue; } /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetFileSegInfo(aorel, aoEntry, SnapshotNow, segno); /* * This should not occur since this segfile info was found by the * "all" method, but better to catch for trouble shooting * (possibly index corruption?) */ if (fsinfo == NULL) elog(ERROR, "file seginfo for AO relation %s %u/%u/%u (segno=%u) is missing", relname, aorel->rd_node.spcNode, aorel->rd_node.dbNode, aorel->rd_node.relNode, segno); AppendOnlySegmentFileTruncateToEOF(aorel, fsinfo); pfree(fsinfo); } pfree(aoEntry); if (segfile_array) { FreeAllSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } }
/* * Performs a compaction of an append-only relation. * * In non-utility mode, all compaction segment files should be * marked as in-use/in-compaction in the appendonlywriter.c code. * */ void AppendOnlyDrop(Relation aorel, List* compaction_segno) { const char* relname; int total_segfiles; FileSegInfo** segfile_array; int i, segno; FileSegInfo* fsinfo; Assert (Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert (RelationIsAoRows(aorel)); relname = RelationGetRelationName(aorel); AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow); elogif (Debug_appendonly_print_compaction, LOG, "Drop AO relation %s", relname); /* Get information about all the file segments we need to scan */ segfile_array = GetAllFileSegInfo(aorel, aoEntry, SnapshotNow, &total_segfiles); for(i = 0 ; i < total_segfiles ; i++) { segno = segfile_array[i]->segno; if (list_find_int(compaction_segno, segno) < 0) { continue; } /* * Try to get the transaction write-lock for the Append-Only segment file. * * NOTE: This is a transaction scope lock that must be held until commit / abort. */ LockRelationAppendOnlySegmentFile( &aorel->rd_node, segfile_array[i]->segno, AccessExclusiveLock, false); /* Re-fetch under the write lock to get latest committed eof. */ fsinfo = GetFileSegInfo(aorel, aoEntry, SnapshotNow, segno); if (fsinfo->state == AOSEG_STATE_AWAITING_DROP) { Assert(HasLockForSegmentFileDrop(aorel)); Assert(!HasSerializableBackends(false)); AppendOnlyCompaction_DropSegmentFile(aorel, segno); ClearFileSegInfo(aorel, aoEntry, segno, AOSEG_STATE_DEFAULT); } pfree(fsinfo); } pfree(aoEntry); if (segfile_array) { FreeAllSegFileInfo(segfile_array, total_segfiles); pfree(segfile_array); } }