/*
 * AppendOnlySegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AppendOnlySegmentFileTruncateToEOF(Relation aorel, 
		FileSegInfo *fsinfo)
{
	const char* relname = RelationGetRelationName(aorel);
	MirroredAppendOnlyOpen mirroredOpened;
	int32				   fileSegNo;
	char			filenamepath[MAXPGPATH];
	int				segno;
	int64			segeof;

	Assert(fsinfo);
	Assert(RelationIsAoRows(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);
	segeof = (int64)fsinfo->eof;

	/* Open and truncate the relation segfile beyond its eof */
	MakeAOSegmentFileName(aorel, segno, -1, &fileSegNo, filenamepath);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Opening AO relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
		   get_namespace_name(RelationGetNamespace(aorel)),
		   relname,
		   aorel->rd_id,
		   aorel->rd_node.relNode,
		   segno,
		   segeof);

	if (OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof, &mirroredOpened))
	{
		TruncateAOSegmentFile(&mirroredOpened, aorel, segeof, ERROR);
		CloseAOSegmentFile(&mirroredOpened);

		elogif(Debug_appendonly_print_compaction, LOG,
				 "Successfully truncated AO ROL relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
				 "No gp_relation_node entry for AO ROW relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
}
/**
 * Finds the visibility map entry tuple for a given
 * segmentFileNum and firstRowNum.
 *
 * Note: The firstRowNum needs to be a valid firstRowNum. It is
 * especially not the tuple id of the append-only tuple checked, updated, 
 * or deleted.
 *
 * Returns true if there is such a tuple and
 * the tuple is used as current tuple.
 * Otherwise false is returned.
 *
 * Assumes that the store data structure has been initialized, but not finished.
 */
bool
AppendOnlyVisimapStore_Find(
		AppendOnlyVisimapStore* visiMapStore,
		int32 segmentFileNum,
		int64 firstRowNum,
		AppendOnlyVisimapEntry* visiMapEntry)
{
	ScanKey scanKeys;
	IndexScanDesc indexScan;

	Assert(visiMapStore);
	Assert(visiMapEntry);
	Assert(RelationIsValid(visiMapStore->visimapRelation));
	Assert(RelationIsValid(visiMapStore->visimapIndex));
	
	elogif (Debug_appendonly_print_visimap, LOG, 
			"Append-only visi map store: Load entry: "
			"(segFileNum, firstRowNum) = (%u, " INT64_FORMAT ")",
			segmentFileNum, firstRowNum);

	scanKeys = visiMapStore->scanKeys;
	scanKeys[0].sk_argument = Int32GetDatum(segmentFileNum);
	scanKeys[1].sk_argument = Int64GetDatum(firstRowNum);

	indexScan = AppendOnlyVisimapStore_BeginScan(
			visiMapStore,
			APPENDONLY_VISIMAP_INDEX_SCAN_KEY_NUM,
			scanKeys);

	if (!AppendOnlyVisimapStore_GetNext(
				visiMapStore,
				indexScan,
				BackwardScanDirection,
				visiMapEntry,
				&visiMapEntry->tupleTid))
	{
		elogif(Debug_appendonly_print_visimap, LOG, 
				"Append-only visi map store: Visimap entry does not exist: "
				"(segFileNum, firstRowNum) = (%u, " INT64_FORMAT ")",
				segmentFileNum, firstRowNum);
		
		// failed to lookup row
		AppendOnlyVisimapStore_EndScan(visiMapStore, indexScan);
		return false;
	}
	AppendOnlyVisimapStore_EndScan(visiMapStore, indexScan);
	return true;
}
static void
AppendOnlyStorageFormat_AddFirstRowNum(
	uint8			*headerPtr,
	bool			usingChecksums,
	int64			firstRowNum)
{
	AOSmallContentHeader 	*blockHeader = (AOSmallContentHeader*)headerPtr;
	
	int32		offsetToFirstRowNum;
	int64		*firstRowNumPtr;

	if (!AoHeader_IsLong(AOSmallContentHeaderGet_headerKind(blockHeader)))
	{
		offsetToFirstRowNum = AoHeader_RegularSize;
	}
	else
	{
		offsetToFirstRowNum = AoHeader_LongSize;
	}
	
	if (usingChecksums) 
		offsetToFirstRowNum += 2 * sizeof(pg_crc32); // Header and Block checksums.
				
	firstRowNumPtr = (int64*)&headerPtr[offsetToFirstRowNum];
	*firstRowNumPtr = firstRowNum;
	
	elogif(Debug_appendonly_print_storage_headers, LOG,
			 "Append-Only storage first row number header result: block_bytes_0_3 0x%X, block_bytes_4_7 0x%X, "
			 "firstRowNum " INT64_FORMAT,
			 blockHeader->smallcontent_bytes_0_3,
			 blockHeader->smallcontent_bytes_4_7,
			 firstRowNum);
}
/*
 * Do open the next segment file to read, but don't do error processing.
 *
 * This routine is responsible for seeking to the proper location given
 * the logical EOF.
 *
 * filePathName - name of the segment file to open.
 */
static File
AppendOnlyStorageRead_DoOpenFile(AppendOnlyStorageRead *storageRead,
								 char *filePathName)
{
	int			fileFlags = O_RDONLY | PG_BINARY;
	/* File mode is S_IRUSR 00400 user has read permission */
	int			fileMode = 0400;
	File		file;

	Assert(storageRead != NULL);
	Assert(storageRead->isActive);
	Assert(filePathName != NULL);

	elogif(Debug_appendonly_print_read_block, LOG,
		   "Append-Only storage read: opening table '%s', segment file '%s', fileFlags 0x%x, fileMode 0x%x",
		   storageRead->relationName,
		   storageRead->segmentFileName,
		   fileFlags,
		   fileMode);

	/*
	 * Open the file for read.
	 */
	file = PathNameOpenFile(filePathName, fileFlags, fileMode);

	return file;
}
Beispiel #5
0
static void
AOCSMoveTuple(TupleTableSlot *slot,
			  AOCSInsertDesc insertDesc,
			  ResultRelInfo *resultRelInfo,
			  EState *estate)
{
	AOTupleId  *oldAoTupleId;
	AOTupleId	newAoTupleId;

	Assert(resultRelInfo);
	Assert(slot);
	Assert(estate);

	oldAoTupleId = (AOTupleId *) slot_get_ctid(slot);
	/* Extract all the values of the tuple */
	slot_getallattrs(slot);

	(void) aocs_insert_values(insertDesc,
							  slot_get_values(slot),
							  slot_get_isnull(slot),
							  &newAoTupleId);

	/* insert index' tuples if needed */
	if (resultRelInfo->ri_NumIndices > 0)
	{
		ExecInsertIndexTuples(slot, (ItemPointer) &newAoTupleId, estate);
		ResetPerTupleExprContext(estate);
	}

	elogif(Debug_appendonly_print_compaction, DEBUG5,
		   "Compaction: Moved tuple (%d," INT64_FORMAT ") -> (%d," INT64_FORMAT ")",
		   AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId),
		   AOTupleIdGet_segmentFileNum(&newAoTupleId), AOTupleIdGet_rowNum(&newAoTupleId));
}
Beispiel #6
0
/*
 * Fills in the relation statistics for an append-only relation.
 *
 *	This information is used to update the reltuples and relpages information
 *	in pg_class. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *	column and we simulate relpages by subdividing the eof value
 *	("pg_aoseg_<oid>:eof") over the defined page size.
 */
void
vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot,
							 BlockNumber *rel_pages, double *rel_tuples,
							 bool *relhasindex)
{
	FileSegTotals *fstotal;
	BlockNumber nblocks;
	char	   *relname;
	double		num_tuples;
	double		totalbytes;
	double		eof;
	int64       hidden_tupcount;
	AppendOnlyVisimap visimap;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	/* get updated statistics from the pg_aoseg table */
	if (RelationIsAoRows(aorel))
	{
		fstotal = GetSegFilesTotals(aorel, snapshot);
	}
	else
	{
		Assert(RelationIsAoCols(aorel));
		fstotal = GetAOCSSSegFilesTotals(aorel, snapshot);
	}

	/* calculate the values we care about */
	eof = (double)fstotal->totalbytes;
	num_tuples = (double)fstotal->totaltuples;
	totalbytes = eof;
	nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes);

	AppendOnlyVisimap_Init(&visimap,
						   aorel->rd_appendonly->visimaprelid,
						   aorel->rd_appendonly->visimapidxid,
						   AccessShareLock,
						   snapshot);
	hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap);
	num_tuples -= hidden_tupcount;
	Assert(num_tuples > -1.0);
	AppendOnlyVisimap_Finish(&visimap, AccessShareLock);

	elogif (Debug_appendonly_print_compaction, LOG,
			"Gather statistics after vacuum for append-only relation %s: "
			"page count %d, tuple count %f",
			relname,
			nblocks, num_tuples);

	*rel_pages = nblocks;
	*rel_tuples = num_tuples;
	*relhasindex = aorel->rd_rel->relhasindex;

	ereport(elevel,
			(errmsg("\"%s\": found %.0f rows in %u pages.",
					relname, num_tuples, nblocks)));
	pfree(fstotal);
}
/*
 * Drops a segment file.
 *
 */ 
static void
AppendOnlyCompaction_DropSegmentFile(Relation aorel,
		int segno)
{
	ItemPointerData persistentTid; 
	int64 persistentSerialNum;

	if (!ReadGpRelationNode(
					aorel->rd_node.relNode,
					segno,
					&persistentTid,
					&persistentSerialNum))
	{
		/* There is nothing to drop */
		return;
	}

	elogif(Debug_appendonly_print_compaction, LOG, 
		"Drop segment file: segno %d", segno);

	MirroredFileSysObj_ScheduleDropAppendOnlyFile(
			&aorel->rd_node,
			segno,
			RelationGetRelationName(aorel),
			&persistentTid,
			persistentSerialNum);

	DeleteGpRelationNodeTuple(aorel, segno);
}
void
AppendOnlyThrowAwayTuple(
					Relation rel,
					MemTuple tuple,
					TupleTableSlot	*slot,
					MemTupleBinding *mt_bind)
{
	AOTupleId *oldAoTupleId;

	Assert(slot);
	Assert(mt_bind);

	oldAoTupleId = (AOTupleId*)slot_get_ctid(slot);
	/* Extract all the values of the tuple */
	slot_getallattrs(slot);

	if (MemTupleHasExternal(tuple, mt_bind))
	{
		toast_delete(rel, (HeapTuple) tuple, mt_bind);
	}

	elogif(Debug_appendonly_print_compaction, DEBUG5, 
			"Compaction: Throw away tuple (%d," INT64_FORMAT ")",
			AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId));
}
/*
 * Stores the visibility map entry.
 *
 * The entry/tuple is invalidated after this function call.
 *
 * Assumes that a valid visimap entry is passed.
 * Assumes that the entry corresponds to the latest tuple
 * returned by AppendOnlyVisimapStore_find.
 *
 * Should not be called twice in the same command.
 */ 
void
AppendOnlyVisimapStore_Store(
		AppendOnlyVisimapStore* visiMapStore,
		AppendOnlyVisimapEntry* visiMapEntry)
{
	MemoryContext oldContext;
	Relation visimapRelation; 
	TupleDesc heapTupleDesc;
	HeapTuple tuple;
	Datum values[Natts_pg_aovisimap];
	bool nulls[Natts_pg_aovisimap];

	Assert(visiMapStore);
	Assert(visiMapEntry);

	elogif (Debug_appendonly_print_visimap, LOG, 
			"Append-only visi map store: Store visimap entry: "
			"(segFileNum, firstRowNum) = (%u, " INT64_FORMAT ")",
			visiMapEntry->segmentFileNum, visiMapEntry->firstRowNum);

	oldContext = MemoryContextSwitchTo(visiMapStore->memoryContext);

	AppendOnlyVisimapEntry_Write(visiMapEntry, values,
			nulls);

	visimapRelation = visiMapStore->visimapRelation;
	heapTupleDesc = RelationGetDescr(visimapRelation);
	tuple = heap_form_tuple(heapTupleDesc,
							  values,
							  nulls);

	/*
	 * Write out the visimap entry to the relation.
	 * If this visimap entry already in the relation, we update
	 * the row. Otherwise, a new row is inserted.
	 */
	if (ItemPointerIsValid(&visiMapEntry->tupleTid))
	{
		simple_heap_update(visimapRelation, &visiMapEntry->tupleTid, tuple);
	}
	else
	{
		simple_heap_insert(visimapRelation, tuple);
	}

	CatalogUpdateIndexes(visimapRelation, tuple);
	
	heap_freetuple(tuple);
	
	MemoryContextSwitchTo(oldContext);

	// Invalidate the data after storing it.
	ItemPointerSetInvalid(&visiMapEntry->tupleTid);	
}
Beispiel #10
0
/**
 * Drops a segment file.
 *
 */
static void
AOCSCompaction_DropSegmentFile(Relation aorel,
							   int segno)
{
	ItemPointerData persistentTid;
	int64		persistentSerialNum;
	int			pseudoSegNo;
	int			col;

	Assert(RelationIsAoCols(aorel));

	for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
	{
		pseudoSegNo = (col * AOTupleId_MultiplierSegmentFileNum) + segno;

		if (!ReadGpRelationNode(
								aorel->rd_rel->reltablespace,
								aorel->rd_rel->relfilenode,
								pseudoSegNo,
								&persistentTid,
								&persistentSerialNum))
		{
			/* There is nothing to drop */
			return;
		}

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Drop segment file: "
			   "segno %d",
			   pseudoSegNo);

		MirroredFileSysObj_ScheduleDropAppendOnlyFile(
													  &aorel->rd_node,
													  pseudoSegNo,
													  RelationGetRelationName(aorel),
													  &persistentTid,
													  persistentSerialNum);

		DeleteGpRelationNodeTuple(aorel,
								  pseudoSegNo);
	}
}
Beispiel #11
0
/*
 * Deletes all visibility map information from a given
 * segment file.
 */ 
void
AppendOnlyVisimapStore_DeleteSegmentFile(
	AppendOnlyVisimapStore *visiMapStore,
	int segmentFileNum)
{
	ScanKeyData scanKey;
	IndexScanDesc indexScan;
	ItemPointerData tid;

	Assert(visiMapStore);
	Assert(RelationIsValid(visiMapStore->visimapRelation));
	Assert(RelationIsValid(visiMapStore->visimapIndex));
	
	elogif(Debug_appendonly_print_visimap, LOG, 
			"Append-only visi map store: Delete segment file: "
			"(segFileNum) = (%u)", segmentFileNum);

	ScanKeyInit(&scanKey,
			Anum_pg_aovisimap_segno, /* segno */
			BTEqualStrategyNumber,
			F_INT4EQ,
			Int32GetDatum(segmentFileNum));

	indexScan = AppendOnlyVisimapStore_BeginScan(
			visiMapStore,
			1,
			&scanKey);

	while (AppendOnlyVisimapStore_GetNext(visiMapStore,
				indexScan,
				ForwardScanDirection,
				NULL,
				&tid))
	{
		simple_heap_delete(visiMapStore->visimapRelation,
				&tid);
	}
	AppendOnlyVisimapStore_EndScan(visiMapStore, indexScan);
}
Beispiel #12
0
static void
AppendOnlyMoveTuple(MemTuple tuple,
					TupleTableSlot	*slot,
					MemTupleBinding *mt_bind,
					AppendOnlyInsertDesc insertDesc,
					ResultRelInfo *resultRelInfo,
					EState *estate)
{
	AOTupleId *oldAoTupleId;
	Oid tupleOid;
	AOTupleId newAoTupleId;

	Assert(resultRelInfo);
	Assert(slot);
	Assert(mt_bind);
	Assert(estate);

	oldAoTupleId = (AOTupleId*)slot_get_ctid(slot);
	/* Extract all the values of the tuple */
	slot_getallattrs(slot);

	tupleOid = MemTupleGetOid(tuple, mt_bind);
	appendonly_insert(insertDesc,
					  tuple,
					  &tupleOid,
					  &newAoTupleId);

	/* insert index' tuples if needed */
	if (resultRelInfo->ri_NumIndices > 0)
	{
		ExecInsertIndexTuples(slot, (ItemPointer)&newAoTupleId, estate, true);
		ResetPerTupleExprContext(estate);
	}

	elogif(Debug_appendonly_print_compaction, DEBUG5, 
			"Compaction: Moved tuple (%d," INT64_FORMAT ") -> (%d," INT64_FORMAT ")",
			AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId),
			AOTupleIdGet_segmentFileNum(&newAoTupleId), AOTupleIdGet_rowNum(&newAoTupleId));
}
Beispiel #13
0
/*
 * Drops a segment file.
 *
 * Actually, we just truncate the segfile to 0 bytes, to reclaim the space.
 * Before GPDB 6, we used to remove the file, but with WAL replication, we
 * no longer have a convenient function to remove a single segment of a
 * relation. An empty file is as almost as good as a non-existent file. If
 * the relation is dropped later, the code in mdunlink() will remove all
 * segments, including any empty ones we've left behind.
 */
static void
AOCSCompaction_DropSegmentFile(Relation aorel,
							   int segno)
{
	int			col;

	Assert(RelationIsAoCols(aorel));

	for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
	{
		char		filenamepath[MAXPGPATH];
		int			pseudoSegNo;
		File		fd;

		/* Open and truncate the relation segfile */
		MakeAOSegmentFileName(aorel, segno, col, &pseudoSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Drop segment file: "
			   "segno %d",
			   pseudoSegNo);

		fd = OpenAOSegmentFile(aorel, filenamepath, pseudoSegNo, 0);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0);
			CloseAOSegmentFile(fd);
		}
		else
		{
			/*
			 * The file we were about to drop/truncate didn't exist. That's normal,
			 * for example, if a column is added with ALTER TABLE ADD COLUMN.
			 */
			elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath);
		}
	}
}
Beispiel #14
0
/*
 * Actually do a base backup for the specified tablespaces.
 *
 * This is split out mainly to avoid complaints about "variable might be
 * clobbered by longjmp" from stupider versions of gcc.
 */
static void
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
{
	XLogRecPtr	startptr;
	XLogRecPtr	endptr;
	char	   *labelfile;

	startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &labelfile);
	Assert(!XLogRecPtrIsInvalid(startptr));

	elogif(!debug_basebackup, LOG,
		   "basebackup perform -- "
		   "Basebackup start xlog location = %X/%X",
		   startptr.xlogid, startptr.xrecoff);

	/*
	 * Set xlogCleanUpTo so that checkpoint process knows
	 * which old xlog files should not be cleaned
	 */
	WalSndSetXLogCleanUpTo(startptr);

	SIMPLE_FAULT_INJECTOR(BaseBackupPostCreateCheckpoint);

	SendXlogRecPtrResult(startptr);

	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
	{
		List	   *filespaces = NIL;
		ListCell   *lc;

		/* Collect information about all filespaces, including pg_system */
		filespaces = get_filespaces_to_send(opt);

		/* Send filespace header */
		SendBackupHeader(filespaces);

		/* Send off our filespaces one by one */
		foreach(lc, filespaces)
		{
			filespaceinfo *fi = (filespaceinfo *) lfirst(lc);
			StringInfoData buf;

			/* Send CopyOutResponse message */
			pq_beginmessage(&buf, 'H');
			pq_sendbyte(&buf, 0);		/* overall format */
			pq_sendint(&buf, 0, 2);		/* natts */
			pq_endmessage(&buf);

			/* In the main tar, include the backup_label first. */
			if (fi->primary_path == NULL)
				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);

			sendDir(fi->primary_path == NULL ? "." : fi->primary_path,
					fi->primary_path == NULL ? 1 : strlen(fi->primary_path),
					opt->exclude, false);

			/* In the main tar, include pg_control last. */
			if (fi->primary_path == NULL)
			{
				struct stat statbuf;

				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
				{
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not stat control file \"%s\": %m",
									XLOG_CONTROL_FILE)));
				}

				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf);

				elogif(debug_basebackup, LOG,
					   "basebackup perform -- Sent file %s." , XLOG_CONTROL_FILE);
			}

			/*
			 * If we're including WAL, and this is the main data directory we
			 * don't terminate the tar stream here. Instead, we will append
			 * the xlog files below and terminate it then. This is safe since
			 * the main data directory is always sent *last*.
			 */
			if (opt->includewal && fi->xlogdir)
			{
				Assert(lnext(lc) == NULL);
			}
			else
				pq_putemptymessage('c');		/* CopyDone */
		}
	}
Beispiel #15
0
/*
 * lazy_vacuum_aorel -- perform LAZY VACUUM for one Append-only relation.
 */
static void
lazy_vacuum_aorel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	bool		update_relstats = true;

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	if (vacuumStatement_IsInAppendOnlyPreparePhase(vacstmt))
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum prepare phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_indexes(onerel, vacstmt, updated_stats);
		if (RelationIsAoRows(onerel))
			AppendOnlyTruncateToEOF(onerel);
		else
			AOCSTruncateToEOF(onerel);

		/*
		 * MPP-23647.  For empty tables, we skip compaction phase
		 * and cleanup phase.  Therefore, we update the stats
		 * (specifically, relfrozenxid) in prepare phase if the
		 * table is empty.  Otherwise, the stats will be updated in
		 * the cleanup phase, when we would have computed the
		 * correct values for stats.
		 */
		if (vacstmt->appendonly_relation_empty)
		{
			update_relstats = true;
			/*
			 * For an empty relation, the only stats we care about
			 * is relfrozenxid and relhasindex.  We need to be
			 * mindful of correctly setting relhasindex here.
			 * relfrozenxid is already taken care of above by
			 * calling vacuum_set_xid_limits().
			 */
			vacrelstats->hasindex = onerel->rd_rel->relhasindex;
		}
		else
		{
			/*
			 * For a non-empty relation, follow the usual
			 * compaction phases and do not update stats in
			 * prepare phase.
			 */
			update_relstats = false;
	}
	}
	else if (!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
		vacuum_appendonly_rel(onerel, vacstmt);
		update_relstats = false;
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum cleanup phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_fill_stats(onerel, ActiveSnapshot,
									 &vacrelstats->rel_pages,
									 &vacrelstats->rel_tuples,
									 &vacrelstats->hasindex);
		/* reset the remaining LVRelStats values */
		vacrelstats->nonempty_pages = 0;
		vacrelstats->num_dead_tuples = 0;
		vacrelstats->max_dead_tuples = 0;
		vacrelstats->tuples_deleted = 0;
		vacrelstats->tot_free_pages = 0;
		vacrelstats->fs_is_heap = false;
		vacrelstats->num_free_pages = 0;
		vacrelstats->max_free_pages = 0;
		vacrelstats->pages_removed = 0;
	}

	if (update_relstats)
	{
		/* Update statistics in pg_class */
		vac_update_relstats_from_list(onerel,
							vacrelstats->rel_pages,
							vacrelstats->rel_tuples,
							vacrelstats->hasindex,
							FreezeLimit,
							updated_stats);

		/* report results to the stats collector, too */
		pgstat_report_vacuum(RelationGetRelid(onerel),
							 onerel->rd_rel->relisshared,
							 true /*vacrelstats->scanned_all*/,
							 vacstmt->analyze, vacrelstats->rel_tuples);
	}
}
Beispiel #16
0
/*
 *	vacuum_appendonly_rel() -- vaccum an append-only relation
 *
 *		This procedure will be what gets executed both for VACUUM
 *		and VACUUM FULL (and also ANALYZE or any other thing that
 *		needs the pg_class stats updated).
 *
 *		The function can compact append-only segment files or just
 *		truncating the segment file to its existing eof.
 *
 *		Afterwards, the reltuples and relpages information in pg_class
 *		are updated. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *		column and we simulate relpages by subdividing the eof value
 *		("pg_aoseg_<oid>:eof") over the defined page size.
 *
 *
 *		There are txn ids, hint bits, free space, dead tuples,
 *		etc. these are all irrelevant in the append only relation context.
 *
 */
void
vacuum_appendonly_rel(Relation aorel, VacuumStmt *vacstmt)
{
	char	   *relname;
	PGRUsage	ru0;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));
	Assert(!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt));

	pg_rusage_init(&ru0);
	relname = RelationGetRelationName(aorel);
	ereport(elevel,
			(errmsg("vacuuming \"%s.%s\"",
					get_namespace_name(RelationGetNamespace(aorel)),
					relname)));

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		return;
	}
	Assert(list_length(vacstmt->appendonly_compaction_insert_segno) <= 1);
	if (vacstmt->appendonly_compaction_insert_segno == NULL)
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum drop phase %s", RelationGetRelationName(aorel));

		if (RelationIsAoRows(aorel))
		{
			AppendOnlyDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
		else
		{
			Assert(RelationIsAoCols(aorel));
			AOCSDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
	}
	else
	{
		int insert_segno = linitial_int(vacstmt->appendonly_compaction_insert_segno);
		if (insert_segno == APPENDONLY_COMPACTION_SEGNO_INVALID)
		{
			elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum pseudo-compaction phase %s", RelationGetRelationName(aorel));
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				"Vacuum compaction phase %s", RelationGetRelationName(aorel));
			if (RelationIsAoRows(aorel))
			{
				AppendOnlyCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
			else
			{
				Assert(RelationIsAoCols(aorel));
				AOCSCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
		}
	}
}
Beispiel #17
0
/*
 * Truncates each segment file to the AOCS relation to its EOF.
 * If we cannot get a lock on the segment file (because e.g. a concurrent insert)
 * the segment file is skipped.
 */
void
AOCSTruncateToEOF(Relation aorel)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Compact AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles);

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "truncate skips AO segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		/*
		 * This should not occur since this segfile info was found by the
		 * "all" method, but better to catch for trouble shooting (possibly
		 * index corruption?)
		 */
		if (fsinfo == NULL)
			elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing",
				 relname,
				 aorel->rd_node.spcNode,
				 aorel->rd_node.dbNode,
				 aorel->rd_node.relNode,
				 segno);

		AOCSSegmentFileTruncateToEOF(aorel, fsinfo);
		pfree(fsinfo);
	}

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}
/*
 * Get information on the next Append-Only Storage Block.
 *
 * Return true if another block was found.  Otherwise, we have reached the
 * end of the current segment file.
 */
bool
AppendOnlyStorageRead_ReadNextBlock(AppendOnlyStorageRead *storageRead)
{
	uint8	   *header;
	AOHeaderCheckError checkError;
	int32		blockLimitLen = 0;	/* Shutup compiler. */
	pg_crc32	storedChecksum;
	pg_crc32	computedChecksum;

	/*
	 * Reset current* variables.
	 */

	/* For efficiency, zero out.  Comment out lines that set fields to 0. */
	memset(&storageRead->current, 0, sizeof(AppendOnlyStorageReadCurrent));

/*	storageRead->current.headerOffsetInFile = 0; */
	storageRead->current.headerKind = AoHeaderKind_None;
/*	storageRead->current.actualHeaderLen = 0; */
/*	storageRead->current.contentLen = 0; */
/*	storageRead->current.overallBlockLen = 0; */
/*	storageRead->current.contentOffset = 0; */
/*	storageRead->current.executorBlockKind = 0; */
/*	storageRead->current.hasFirstRowNum = false; */
	storageRead->current.firstRowNum = INT64CONST(-1);
/*	storageRead->current.rowCount = 0; */
/*	storageRead->current.isLarge = false; */
/*	storageRead->current.isCompressed = false; */
/*	storageRead->current.compressedLen = 0; */

	elogif(Debug_appendonly_print_datumstream, LOG,
		   "before AppendOnlyStorageRead_PositionToNextBlock, storageRead->current.headerOffsetInFile is" INT64_FORMAT "storageRead->current.overallBlockLen is %d",
		   storageRead->current.headerOffsetInFile, storageRead->current.overallBlockLen);

	if (!AppendOnlyStorageRead_PositionToNextBlock(storageRead,
									&storageRead->current.headerOffsetInFile,
												   &header,
												   &blockLimitLen))
	{
		/* Done reading the file */
		return false;
	}

	elogif(Debug_appendonly_print_datumstream, LOG,
		   "after AppendOnlyStorageRead_PositionToNextBlock, storageRead->current.headerOffsetInFile is" INT64_FORMAT "storageRead->current.overallBlockLen is %d",
		   storageRead->current.headerOffsetInFile, storageRead->current.overallBlockLen);

	/*----------
	 * Proceed very carefully:
	 * [ 1. Verify header checksum ]
	 *	 2. Examine (basic) header.
	 *	 3. Examine specific header.
	 * [ 4. Verify the block checksum ]
	 *----------
	 */
	if (storageRead->storageAttributes.checksum &&
		gp_appendonly_verify_block_checksums)
	{
		if (!AppendOnlyStorageFormat_VerifyHeaderChecksum(header,
														  &storedChecksum,
														  &computedChecksum))
			ereport(ERROR,
					(errmsg("Header checksum does not match.  Expected 0x%X and found 0x%X ",
							storedChecksum,
							computedChecksum),
			   errdetail_appendonly_read_storage_content_header(storageRead),
					 errcontext_appendonly_read_storage_block(storageRead)));
	}

	/*
	 * Check the (basic) header information.
	 */
	checkError = AppendOnlyStorageFormat_GetHeaderInfo(header,
									 storageRead->storageAttributes.checksum,
											&storageRead->current.headerKind,
									  &storageRead->current.actualHeaderLen);
	if (checkError != AOHeaderCheckOk)
		ereport(ERROR,
				(errmsg("Bad append-only storage header.  Header check error %d, detail '%s'",
						(int) checkError,
						AppendOnlyStorageFormat_GetHeaderCheckErrorStr()),
			   errdetail_appendonly_read_storage_content_header(storageRead),
				 errcontext_appendonly_read_storage_block(storageRead)));

	/*
	 * Get more header since AppendOnlyStorageRead_PositionToNextBlock only
	 * gets minimum.
	 */
	if (storageRead->minimumHeaderLen < storageRead->current.actualHeaderLen)
	{
		int32		availableLen;

		header = BufferedReadGrowBuffer(&storageRead->bufferedRead,
										storageRead->current.actualHeaderLen,
										&availableLen);

		if (header == NULL ||
			availableLen != storageRead->current.actualHeaderLen)
			ereport(ERROR,
					(errcode(ERRCODE_GP_INTERNAL_ERROR),
				   errmsg("Expected %d bytes and found %d bytes in table %s "
						  "(segment file '%s', header offset in file = " INT64_FORMAT ", bufferCount " INT64_FORMAT ")",
						  storageRead->current.actualHeaderLen,
						  availableLen,
						  storageRead->relationName,
						  storageRead->segmentFileName,
						  storageRead->current.headerOffsetInFile,
						  storageRead->bufferCount)));
	}

	/*
	 * Based on the kind of header, we either have small or large content.
	 */
	switch (storageRead->current.headerKind)
	{
		case AoHeaderKind_SmallContent:

			/*
			 * Check the SmallContent header information.
			 */
			checkError = AppendOnlyStorageFormat_GetSmallContentHeaderInfo
				(header,
				 storageRead->current.actualHeaderLen,
				 storageRead->storageAttributes.checksum,
				 blockLimitLen,
				 &storageRead->current.overallBlockLen,
				 &storageRead->current.contentOffset,
				 &storageRead->current.uncompressedLen,
				 &storageRead->current.executorBlockKind,
				 &storageRead->current.hasFirstRowNum,
				 storageRead->formatVersion,
				 &storageRead->current.firstRowNum,
				 &storageRead->current.rowCount,
				 &storageRead->current.isCompressed,
				 &storageRead->current.compressedLen
				);
			if (checkError != AOHeaderCheckOk)
				ereport(ERROR,
						(errmsg("Bad append-only storage header of type small content. Header check error %d, detail '%s'",
								(int) checkError,
						   AppendOnlyStorageFormat_GetHeaderCheckErrorStr()),
				errdetail_appendonly_read_storage_content_header(storageRead),
					 errcontext_appendonly_read_storage_block(storageRead)));
			break;

		case AoHeaderKind_LargeContent:

			/*
			 * Check the LargeContent metadata header information.
			 */
			checkError = AppendOnlyStorageFormat_GetLargeContentHeaderInfo
				(header,
				 storageRead->current.actualHeaderLen,
				 storageRead->storageAttributes.checksum,
				 &storageRead->current.uncompressedLen,
				 &storageRead->current.executorBlockKind,
				 &storageRead->current.hasFirstRowNum,
				 &storageRead->current.firstRowNum,
				 &storageRead->current.rowCount);
			if (checkError != AOHeaderCheckOk)
				ereport(ERROR,
						(errmsg("Bad append-only storage header of type large content. Header check error %d, detail '%s'",
								(int) checkError,
						   AppendOnlyStorageFormat_GetHeaderCheckErrorStr()),
				errdetail_appendonly_read_storage_content_header(storageRead),
					 errcontext_appendonly_read_storage_block(storageRead)));
			storageRead->current.isLarge = true;
			break;

		case AoHeaderKind_NonBulkDenseContent:

			/*
			 * Check the NonBulkDense header information.
			 */
			checkError =
				AppendOnlyStorageFormat_GetNonBulkDenseContentHeaderInfo
				(header,
				 storageRead->current.actualHeaderLen,
				 storageRead->storageAttributes.checksum,
				 blockLimitLen,
				 &storageRead->current.overallBlockLen,
				 &storageRead->current.contentOffset,
				 &storageRead->current.uncompressedLen,
				 &storageRead->current.executorBlockKind,
				 &storageRead->current.hasFirstRowNum,
				 storageRead->formatVersion,
				 &storageRead->current.firstRowNum,
				 &storageRead->current.rowCount
				);
			if (checkError != AOHeaderCheckOk)
				ereport(ERROR,
						(errmsg("Bad append-only storage header of type non-bulk dense content. Header check error %d, detail '%s'",
								(int) checkError,
						   AppendOnlyStorageFormat_GetHeaderCheckErrorStr()),
				errdetail_appendonly_read_storage_content_header(storageRead),
					 errcontext_appendonly_read_storage_block(storageRead)));
			break;

		case AoHeaderKind_BulkDenseContent:

			/*
			 * Check the BulkDenseContent header information.
			 */
			checkError =
				AppendOnlyStorageFormat_GetBulkDenseContentHeaderInfo
				(header,
				 storageRead->current.actualHeaderLen,
				 storageRead->storageAttributes.checksum,
				 blockLimitLen,
				 &storageRead->current.overallBlockLen,
				 &storageRead->current.contentOffset,
				 &storageRead->current.uncompressedLen,
				 &storageRead->current.executorBlockKind,
				 &storageRead->current.hasFirstRowNum,
				 storageRead->formatVersion,
				 &storageRead->current.firstRowNum,
				 &storageRead->current.rowCount,
				 &storageRead->current.isCompressed,
				 &storageRead->current.compressedLen
				);
			if (checkError != AOHeaderCheckOk)
				ereport(ERROR,
						(errmsg("Bad append-only storage header of type bulk dense content. Header check error %d, detail '%s'",
								(int) checkError,
						   AppendOnlyStorageFormat_GetHeaderCheckErrorStr()),
				errdetail_appendonly_read_storage_content_header(storageRead),
					 errcontext_appendonly_read_storage_block(storageRead)));
			break;

		default:
			elog(ERROR, "Unexpected Append-Only header kind %d",
				 storageRead->current.headerKind);
			break;
	}

	if (Debug_appendonly_print_storage_headers)
	{
		AppendOnlyStorageRead_LogBlockHeader(storageRead, header);
	}

	if (storageRead->current.hasFirstRowNum)
	{
		/* UNDONE: Grow buffer and read the value into firstRowNum. */
	}

	if (storageRead->current.headerKind == AoHeaderKind_LargeContent)
	{
		/* UNDONE: Finish the read for the information only header. */
	}

	return true;
}
/*
 * Initialize AppendOnlyStorageRead.
 *
 * The AppendOnlyStorageRead data structure is initialized once for a read
 * "session" and can be used to read Append-Only Storage Blocks from 1 or
 * more segment files.
 *
 * The current file to read to is opened with the
 * AppendOnlyStorageRead_OpenFile routine.
 *
 * storageRead		- data structure to initialize
 * memoryContext	- memory context to use for buffers and other memory
 *					  needs. When NULL, the current memory context is used.
 * maxBufferLen		- maximum Append-Only Storage Block length including all
 *					  storage headers.
 * relationName		- name of the relation to use in system logging and
 *					  error messages.
 * title			- A phrase that better describes the purpose of this open.
 *					  The caller manages the storage for this.
 * storageAttributes - Append-Only Storage Attributes from relation creation.
 */
void
AppendOnlyStorageRead_Init(AppendOnlyStorageRead *storageRead,
						   MemoryContext memoryContext,
						   int32 maxBufferLen,
						   char *relationName,
						   char *title,
						   AppendOnlyStorageAttributes *storageAttributes)
{
	int			relationNameLen;
	uint8	   *memory;
	int32		memoryLen;
	MemoryContext oldMemoryContext;

	Assert(storageRead != NULL);

	/* UNDONE: Range check maxBufferLen */

	Assert(relationName != NULL);
	Assert(storageAttributes != NULL);

	/* UNDONE: Range check fields in storageAttributes */

	MemSet(storageRead, 0, sizeof(AppendOnlyStorageRead));

	storageRead->maxBufferLen = maxBufferLen;

	if (memoryContext == NULL)
		storageRead->memoryContext = CurrentMemoryContext;
	else
		storageRead->memoryContext = memoryContext;

	oldMemoryContext = MemoryContextSwitchTo(storageRead->memoryContext);

	memcpy(&storageRead->storageAttributes,
		   storageAttributes,
		   sizeof(AppendOnlyStorageAttributes));

	relationNameLen = strlen(relationName);
	storageRead->relationName = (char *) palloc(relationNameLen + 1);
	memcpy(storageRead->relationName, relationName, relationNameLen + 1);

	storageRead->title = title;

	storageRead->minimumHeaderLen =
		AppendOnlyStorageFormat_RegularHeaderLenNeeded(
									storageRead->storageAttributes.checksum);

	/*
	 * Initialize BufferedRead.
	 */
	storageRead->largeReadLen = 2 * storageRead->maxBufferLen;

	memoryLen = BufferedReadMemoryLen(storageRead->maxBufferLen,
									  storageRead->largeReadLen);

	Assert(CurrentMemoryContext == storageRead->memoryContext);
	memory = (uint8 *) palloc(memoryLen);

	BufferedReadInit(&storageRead->bufferedRead,
					 memory,
					 memoryLen,
					 storageRead->maxBufferLen,
					 storageRead->largeReadLen,
					 relationName);

	elogif(Debug_appendonly_print_scan || Debug_appendonly_print_read_block, LOG,
		   "Append-Only Storage Read initialize for table '%s' "
		   "(compression = %s, compression level %d, maximum buffer length %d, large read length %d)",
		   storageRead->relationName,
		   (storageRead->storageAttributes.compress ? "true" : "false"),
		   storageRead->storageAttributes.compressLevel,
		   storageRead->maxBufferLen,
		   storageRead->largeReadLen);

	storageRead->file = -1;
	storageRead->formatVersion = -1;

	MemoryContextSwitchTo(oldMemoryContext);

	storageRead->isActive = true;
}
Beispiel #20
0
/*
 * AOCSSegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AOCSSegmentFileTruncateToEOF(Relation aorel,
							 AOCSFileSegInfo *fsinfo)
{
	const char *relname = RelationGetRelationName(aorel);
	int			segno;
	int			j;

	Assert(fsinfo);
	Assert(RelationIsAoCols(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);

	for (j = 0; j < fsinfo->vpinfo.nEntry; ++j)
	{
		int64		segeof;
		char		filenamepath[MAXPGPATH];
		AOCSVPInfoEntry *entry;
		File		fd;
		int32		fileSegNo;

		entry = getAOCSVPEntry(fsinfo, j);
		segeof = entry->eof;

		/* Open and truncate the relation segfile to its eof */
		MakeAOSegmentFileName(aorel, segno, j, &fileSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
			   get_namespace_name(RelationGetNamespace(aorel)),
			   relname,
			   aorel->rd_id,
			   aorel->rd_node.relNode,
			   j,
			   segno,
			   fileSegNo,
			   segeof);

		fd = OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof);
			CloseAOSegmentFile(fd);

			elogif(Debug_appendonly_print_compaction, LOG,
				   "Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				   "No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
	}
}
Beispiel #21
0
/*
 * Assumes that the segment file lock is already held.
 * Assumes that the segment file should be compacted.
 */
static bool
AOCSSegmentFileFullCompaction(Relation aorel,
							  AOCSInsertDesc insertDesc,
							  AOCSFileSegInfo *fsinfo,
							  Snapshot snapshot)
{
	const char *relname;
	AppendOnlyVisimap visiMap;
	AOCSScanDesc scanDesc;
	TupleDesc	tupDesc;
	TupleTableSlot *slot;
	int			compact_segno;
	int64		movedTupleCount = 0;
	ResultRelInfo *resultRelInfo;
	MemTupleBinding *mt_bind;
	EState	   *estate;
	bool	   *proj;
	int			i;
	AOTupleId  *aoTupleId;
	int64		tupleCount = 0;
	int64		tuplePerPage = INT_MAX;

	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(RelationIsAoCols(aorel));
	Assert(insertDesc);

	compact_segno = fsinfo->segno;
	if (fsinfo->varblockcount > 0)
	{
		tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount;
	}
	relname = RelationGetRelationName(aorel);

	AppendOnlyVisimap_Init(&visiMap,
						   aorel->rd_appendonly->visimaprelid,
						   aorel->rd_appendonly->visimapidxid,
						   ShareLock,
						   snapshot);

	elogif(Debug_appendonly_print_compaction,
		   LOG, "Compact AO segfile %d, relation %sd",
		   compact_segno, relname);

	proj = palloc0(sizeof(bool) * RelationGetNumberOfAttributes(aorel));
	for (i = 0; i < RelationGetNumberOfAttributes(aorel); ++i)
	{
		proj[i] = true;
	}
	scanDesc = aocs_beginrangescan(aorel,
								   snapshot, snapshot,
								   &compact_segno, 1, NULL, proj);

	tupDesc = RelationGetDescr(aorel);
	slot = MakeSingleTupleTableSlot(tupDesc);
	mt_bind = create_memtuple_binding(tupDesc);

	/*
	 * We need a ResultRelInfo and an EState so we can use the regular
	 * executor's index-entry-making machinery.
	 */
	estate = CreateExecutorState();
	resultRelInfo = makeNode(ResultRelInfo);
	resultRelInfo->ri_RangeTableIndex = 1;	/* dummy */
	resultRelInfo->ri_RelationDesc = aorel;
	resultRelInfo->ri_TrigDesc = NULL;	/* we don't fire triggers */
	ExecOpenIndices(resultRelInfo);
	estate->es_result_relations = resultRelInfo;
	estate->es_num_result_relations = 1;
	estate->es_result_relation_info = resultRelInfo;

	while (aocs_getnext(scanDesc, ForwardScanDirection, slot))
	{
		CHECK_FOR_INTERRUPTS();

		aoTupleId = (AOTupleId *) slot_get_ctid(slot);
		if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId))
		{
			AOCSMoveTuple(slot,
						  insertDesc,
						  resultRelInfo,
						  estate);
			movedTupleCount++;
		}
		else
		{
			/* Tuple is invisible and needs to be dropped */
			AppendOnlyThrowAwayTuple(aorel,
									 slot,
									 mt_bind);
		}

		/*
		 * Check for vacuum delay point after approximatly a var block
		 */
		tupleCount++;
		if (VacuumCostActive && tupleCount % tuplePerPage == 0)
		{
			vacuum_delay_point();
		}
	}

	SetAOCSFileSegInfoState(aorel, compact_segno,
							AOSEG_STATE_AWAITING_DROP);

	AppendOnlyVisimap_DeleteSegmentFile(&visiMap,
										compact_segno);

	/* Delete all mini pages of the segment files if block directory exists */
	if (OidIsValid(aorel->rd_appendonly->blkdirrelid))
	{
		AppendOnlyBlockDirectory_DeleteSegmentFile(aorel,
												   snapshot,
												   compact_segno,
												   0);
	}

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Finished compaction: "
		   "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT,
		   compact_segno, relname, movedTupleCount);

	AppendOnlyVisimap_Finish(&visiMap, NoLock);

	ExecCloseIndices(resultRelInfo);
	FreeExecutorState(estate);

	ExecDropSingleTupleTableSlot(slot);
	destroy_memtuple_binding(mt_bind);

	aocs_endscan(scanDesc);
	pfree(proj);

	return true;
}
void
AppendOnlyStorageFormat_MakeLargeContentHeader(
	uint8			*headerPtr,
	bool			usingChecksums,
	bool			hasFirstRowNum,
	int				version,
	int64			firstRowNum,
	int				executorKind,
	int				largeRowCount,
	int32			largeContentLength)
{
	AOLargeContentHeader 	*largeContentHeader;

	Assert(headerPtr != NULL);
	
	largeContentHeader = (AOLargeContentHeader*)headerPtr;

	elogif(Debug_appendonly_print_storage_headers, LOG,
			 "Append-Only Storage make LargeContent header parameters: usingChecksums = %s, executorKind = %d, "
			 "largeRowCount = %d, largeContentLength %d",
			 (usingChecksums ? "true" : "false"),
			 executorKind,
			 largeRowCount,
			 largeContentLength);
	
	/* Zero out whole header */
	AOLargeContentHeaderInit_Init(largeContentHeader);
	
	AOLargeContentHeaderInit_headerKind(largeContentHeader,AoHeaderKind_LargeContent);
	AOLargeContentHeaderInit_executorBlockKind(largeContentHeader,executorKind);
	AOLargeContentHeaderInit_largeRowCount(largeContentHeader,largeRowCount);
	AOLargeContentHeaderInit_largeContentLength(largeContentHeader,largeContentLength);
	AOLargeContentHeaderInit_hasFirstRowNum(largeContentHeader,hasFirstRowNum);

	/*
	 * Add the optional firstRowNum.
	 *
	 * NOTE: This is not part of the 8-byte (64-bit) header because it is so big.
	 * NOTE: And, it is not covered by the header checksum because in order to
	 * NOTE: determine if we should checksum more data we would need to examine
	 * NOTE: the header data not verified by checksum yet...
	 *
	 * So, the firstRowNum is extra data between the header (and checksums) and
	 * the content.  We must add it before computing the checksum.
	 */
	if (hasFirstRowNum)
	{
		AppendOnlyStorageFormat_AddFirstRowNum(
											headerPtr,
											usingChecksums,
											firstRowNum);
	}
	
	if (usingChecksums)
	{
		// UNDONE: Set 2nd checksum to 0 when there is no content???
		AppendOnlyStorageFormat_AddBlockHeaderChecksums(
											headerPtr,
											/* isCompressed */ false,
											/* hasFirstRowNum */ false,
											version,
											/* dataLength */ 0,
											/* compressedLength */ 0);
	}
	else
	{
		elogif(Debug_appendonly_print_storage_headers, LOG,
				 "Append-Only storage make block header result: block_bytes_0_3 0x%X, block_bytes_4_7 0x%X",
				 largeContentHeader->largecontent_bytes_0_3,
				 largeContentHeader->largecontent_bytes_4_7);
	}
}
Beispiel #23
0
/*
 * Performs a compaction of an append-only AOCS relation.
 *
 * In non-utility mode, all compaction segment files should be
 * marked as in-use/in-compaction in the appendonlywriter.c code.
 *
 */
void
AOCSDrop(Relation aorel,
		 List *compaction_segno)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Drop AOCS relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel,
										  appendOnlyMetaDataSnapshot, &total_segfiles);

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;
		if (!list_member_int(compaction_segno, segno))
		{
			continue;
		}

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "drop skips AOCS segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		if (fsinfo->state == AOSEG_STATE_AWAITING_DROP)
		{
			Assert(HasLockForSegmentFileDrop(aorel));
			AOCSCompaction_DropSegmentFile(aorel, segno);
			ClearAOCSFileSegInfo(aorel, segno, AOSEG_STATE_DEFAULT);
		}
		pfree(fsinfo);
	}

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}
Beispiel #24
0
void
AlterTableCreateAoVisimapTable(Oid relOid, bool is_part_child)
{
	Relation	rel;
	IndexInfo  *indexInfo;
	TupleDesc	tupdesc;
	Oid			classObjectId[2];
	int16		coloptions[2];

	elogif(Debug_appendonly_print_visimap, LOG,
		   "Create visimap for relation %d",
		   relOid);

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel))
	{
		heap_close(rel, NoLock);
		return;
	}

	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(Natts_pg_aovisimap, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "visimap",
					   BYTEAOID,
					   -1, 0);

	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 2;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_Concurrent = false;

	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT8_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;

	(void) CreateAOAuxiliaryTable(rel,
								  "pg_aovisimap",
								  RELKIND_AOVISIMAP,
								  tupdesc, indexInfo, classObjectId, coloptions);

	heap_close(rel, NoLock);
}
Beispiel #25
0
/*
 * Initialize AppendOnlyStorageRead.
 *
 * The AppendOnlyStorageRead data structure is initialized
 * once for a read "session" and can be used to read
 * Append-Only Storage Blocks from 1 or more segment files.
 *
 * The current file to read to is opened with the
 * AppendOnlyStorageRead_OpenFile routine.
 */
void AppendOnlyStorageRead_Init(
	AppendOnlyStorageRead			*storageRead,
				/* The data structure to initialize. */

	MemoryContext 					memoryContext,
				/*
				 * The memory context to use for buffers and
				 * other memory needs.  When NULL, the
				 * current memory context is used.
				 */
    int32                			maxBufferLen,
				/*
				 * The maximum Append-Only Storage Block
				 * length including all storage headers.
				 */
	char							*relationName,
				/*
				 * Name of the relation to use in system
				 * logging and error messages.
				 */

	char							*title,
				/*
				 * A phrase that better describes the purpose of the this open.
				 *
				 * The caller manages the storage for this.
				 */

	AppendOnlyStorageAttributes		*storageAttributes)
				/*
				 * The Append-Only Storage Attributes
				 * from relation creation.
				 */
{
	int		relationNameLen;
	uint8	*memory;
	int32	memoryLen;
	MemoryContext	oldMemoryContext;

	Assert(storageRead != NULL);

	// UNDONE: Range check maxBufferLen

	Assert(relationName != NULL);
	Assert(storageAttributes != NULL);

	// UNDONE: Range check fields in storageAttributes

	MemSet(storageRead, 0, sizeof(AppendOnlyStorageRead));

	storageRead->maxBufferLen = maxBufferLen;

	if (memoryContext == NULL)
		storageRead->memoryContext = CurrentMemoryContext;
	else
		storageRead->memoryContext = memoryContext;

	oldMemoryContext = MemoryContextSwitchTo(storageRead->memoryContext);

	memcpy(
		&storageRead->storageAttributes,
		storageAttributes,
		sizeof(AppendOnlyStorageAttributes));

	relationNameLen = strlen(relationName);
	storageRead->relationName = (char *) palloc(relationNameLen + 1);
	memcpy(storageRead->relationName, relationName, relationNameLen + 1);

	storageRead->title = title;

	storageRead->minimumHeaderLen =
		AppendOnlyStorageFormat_RegularHeaderLenNeeded(
									storageRead->storageAttributes.checksum);

	/*
	 * Initialize BufferedRead.
	 */
	storageRead->largeReadLen = 2 * storageRead->maxBufferLen;

	memoryLen =
		BufferedReadMemoryLen(
					storageRead->maxBufferLen,
					storageRead->largeReadLen);

	Assert(CurrentMemoryContext == storageRead->memoryContext);
	memory = (uint8*)palloc(memoryLen);

	BufferedReadInit(&storageRead->bufferedRead,
					 memory,
					 memoryLen,
					 storageRead->maxBufferLen,
					 storageRead->largeReadLen,
					 relationName);

	elogif(Debug_appendonly_print_scan || Debug_appendonly_print_read_block, LOG,
		"Append-Only Storage Read initialize for table '%s' "
		     "(compression = %s, compression level %d, maximum buffer length %d, large read length %d)",
		     storageRead->relationName,
		     (storageRead->storageAttributes.compress ? "true" : "false"),
		     storageRead->storageAttributes.compressLevel,
		     storageRead->maxBufferLen,
		     storageRead->largeReadLen);

	storageRead->file = -1;

	MemoryContextSwitchTo(oldMemoryContext);

	storageRead->isActive = true;

}
/*
 * Skip zero padding to next page boundary, if necessary.
 *
 * This function is called when the file system block we are scanning has
 * no more valid data but instead is padded with zero's from the position
 * we are currently in until the end of the block. The function will skip
 * to the end of block if skipLen is -1 or skip skipLen bytes otherwise.
 */
static void
AppendOnlyStorageRead_DoSkipPadding(AppendOnlyStorageRead *storageRead,
									int32 skipLen)
{
	int64		nextReadPosition;
	int64		nextBoundaryPosition;
	int32		safeWriteRemainder;
	bool		doSkip;
	uint8	   *buffer;
	int32		availableLen;
	int32		safewrite = storageRead->storageAttributes.safeFSWriteSize;

	/* early exit if no pad used */
	if (safewrite == 0)
		return;

	nextReadPosition =
		BufferedReadNextBufferPosition(&storageRead->bufferedRead);
	nextBoundaryPosition =
		((nextReadPosition + safewrite - 1) / safewrite) * safewrite;
	safeWriteRemainder = (int32) (nextBoundaryPosition - nextReadPosition);

	if (safeWriteRemainder <= 0)
		doSkip = false;
	else if (skipLen == -1)
	{
		/*
		 * Skip to end of page.
		 */
		doSkip = true;
		skipLen = safeWriteRemainder;
	}
	else
		doSkip = (safeWriteRemainder < skipLen);

	if (doSkip)
	{
		/*
		 * Read through the remainder.
		 */
		buffer = BufferedReadGetNextBuffer(&storageRead->bufferedRead,
										   safeWriteRemainder,
										   &availableLen);

		/*
		 * Since our file EOF should always be a multiple of the file-system
		 * page, we do not expect a short read here.
		 */
		if (buffer == NULL)
			availableLen = 0;
		if (buffer == NULL || safeWriteRemainder != availableLen)
		{
			ereport(ERROR,
					(errcode(ERRCODE_GP_INTERNAL_ERROR),
					 errmsg("Unexpected end of file.  Expected to read %d bytes after position " INT64_FORMAT " but found %d bytes (bufferCount  " INT64_FORMAT ")\n",
							safeWriteRemainder,
							nextReadPosition,
							availableLen,
							storageRead->bufferCount)));
		}

		/*
		 * UNDONE: For verification purposes, we should verify the
		 * remainder is all zeroes.
		 */

		elogif(Debug_appendonly_print_scan, LOG,
			   "Append-only scan skipping zero padded remainder for table '%s' (nextReadPosition = " INT64_FORMAT ", safeWriteRemainder = %d)",
			   storageRead->relationName,
			   nextReadPosition,
			   safeWriteRemainder);
	}
}
void
AppendOnlyStorageFormat_MakeBulkDenseContentHeader(
	uint8			*headerPtr,
	bool			usingChecksums,
	bool			hasFirstRowNum,
	int				version,
	int64			firstRowNum,
	int				executorKind,
	int				rowCount,
	int32			dataLength,
	int32			compressedLength)
{
	AOBulkDenseContentHeader 	*blockHeader;
	int32						firstHeaderAndChecksumsLen;
	AOBulkDenseContentHeaderExt *extHeader;

	bool			isCompressed;

	Assert(headerPtr != NULL);
	
	blockHeader = (AOBulkDenseContentHeader*)headerPtr;
	firstHeaderAndChecksumsLen =		
							AoHeader_RegularSize + 
							(usingChecksums ? 2 * sizeof(pg_crc32) : 0);

	/*
	 * The extension header is in the data portion with first row number.
	 */
	extHeader = (AOBulkDenseContentHeaderExt*)(headerPtr + firstHeaderAndChecksumsLen);

	elogif(Debug_appendonly_print_storage_headers, LOG,
			 "Append-Only storage make Bulk Dense Content header parameters: wantChecksum = %s, hasFirstRowNum %s, executorKind = %d, "
			 "rowCount = %d, dataLength %d, compressedLength %d",
			 (usingChecksums ? "true" : "false"),
			 (hasFirstRowNum ? "true" : "false"),
			 executorKind,
			 rowCount,
			 dataLength,
			 compressedLength);
	
	/* Zero out whole header */
	AOBulkDenseContentHeaderInit_Init(blockHeader);
	
	AOBulkDenseContentHeaderInit_headerKind(blockHeader,AoHeaderKind_BulkDenseContent);
	AOBulkDenseContentHeaderInit_executorBlockKind(blockHeader,executorKind);
	AOBulkDenseContentHeaderInit_dataLength(blockHeader,dataLength);
	AOBulkDenseContentHeaderInit_hasFirstRowNum(blockHeader,hasFirstRowNum);
	
	isCompressed = (compressedLength > 0);
	if (isCompressed)
		AOBulkDenseContentHeaderInit_compressedLength(blockHeader,compressedLength);

	/* Zero out whole extension */
	AOBulkDenseContentHeaderExtInit_Init(extHeader);
	AOBulkDenseContentHeaderExtInit_largeRowCount(extHeader,rowCount);
	
	/*
	 * Add the optional firstRowNum.
	 *
	 * NOTE: This is not part of the 8-byte (64-bit) header because it is so big.
	 * NOTE: And, it is not covered by the header checksum because in order to
	 * NOTE: determine if we should checksum more data we would need to examine
	 * NOTE: the header data not verified by checksum yet...
	 *
	 * So, the firstRowNum is extra data between the header (and checksums) and
	 * the content.  We must add it before computing the checksum.
	 */
	if (hasFirstRowNum)
	{
		AppendOnlyStorageFormat_AddFirstRowNum(
											headerPtr,
											usingChecksums,
											firstRowNum);
	}
	
	if (usingChecksums)
	{
		AppendOnlyStorageFormat_AddBlockHeaderChecksums(
											headerPtr,
											isCompressed,
											hasFirstRowNum,
											version,
											dataLength,
											compressedLength);
	}
	else
	{
		elogif(Debug_appendonly_print_storage_headers, LOG,
				 "Append-Only storage make Bulk Dense Content header result: "
				 "bulkdensecontent_bytes_0_3 0x%X, bulkdensecontent_bytes_4_7 0x%X "
				 "bulkdensecontent_ext_bytes_0_3 0x%X, bulkdensecontent_ext_bytes_4_7 0x%X ",
				 blockHeader->bulkdensecontent_bytes_0_3,
				 blockHeader->bulkdensecontent_bytes_4_7,
				 extHeader->bulkdensecontent_ext_bytes_0_3,
				 extHeader->bulkdensecontent_ext_bytes_4_7);
	}

#ifdef USE_ASSERT_CHECKING
	{
		int 			checkHeaderLen;
		int32			checkLength;
		int32			checkBlockLimitLen;
		int32			checkOverallBlockLen;
		int32			checkOffset;
		int32			checkUncompressedLen;
		int 			checkExecutorBlockKind;
		bool			checkHasFirstRowNum;
		int64			checkFirstRowNum;
		int 			checkRowCount;
		bool			checkIsCompressed;
		int32			checkCompressedLen;

		AOHeaderCheckError checkError;

		checkHeaderLen = firstHeaderAndChecksumsLen +
						 AoHeader_RegularSize;
		if (hasFirstRowNum)
			checkHeaderLen += sizeof(int64);

		if (compressedLength == 0)
		{
			checkLength = dataLength;
		}
		else
		{
			checkLength = compressedLength;
		}
		checkBlockLimitLen = checkHeaderLen +
							 AOStorage_RoundUp(checkLength, version);

		checkError =
			AppendOnlyStorageFormat_GetBulkDenseContentHeaderInfo(
														headerPtr,
														checkHeaderLen,
														usingChecksums,
														checkBlockLimitLen,
														&checkOverallBlockLen,
														&checkOffset,
														&checkUncompressedLen,
														&checkExecutorBlockKind,
														&checkHasFirstRowNum,
														version,
														&checkFirstRowNum,
														&checkRowCount,
														&checkIsCompressed,
														&checkCompressedLen);
		if (checkError != AOHeaderCheckOk)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Header check error %d, detail '%s'",
							(int)checkError,
							AppendOnlyStorageFormat_GetHeaderCheckErrorStr())));

		if (checkOverallBlockLen != checkBlockLimitLen)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found block length %d, expected %d",
							checkOverallBlockLen,
							checkBlockLimitLen)));
		if (checkOffset != checkHeaderLen)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found data offset %d, expected %d",
							checkOffset,
							checkHeaderLen)));
		if (checkUncompressedLen != dataLength)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found uncompressed length %d, expected %d",
							checkUncompressedLen,
							dataLength)));
		if (checkExecutorBlockKind != executorKind)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found executor kind %d, expected %d",
							checkExecutorBlockKind,
							executorKind)));
		if (checkHasFirstRowNum != hasFirstRowNum)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found has first row number flag %s, expected %s",
							(checkHasFirstRowNum ? "true" : "false"),
							(hasFirstRowNum ? "true" : "false"))));
		if (hasFirstRowNum)
		{
			if (checkFirstRowNum != firstRowNum)
				ereport(ERROR,
						(errmsg("Problem making append-only storage header of type bulk dense content. "
								"Found first row number " INT64_FORMAT ", expected " INT64_FORMAT,
								checkFirstRowNum,
								firstRowNum)));
		}
		if (checkRowCount != rowCount)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found row count %d, expected %d",
							checkRowCount,
							rowCount)));
		if (checkIsCompressed != isCompressed)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found is compressed flag %s, expected %s",
							(checkIsCompressed ? "true" : "false"),
							(isCompressed ? "true" : "false"))));
		if (checkCompressedLen != compressedLength)
			ereport(ERROR,
					(errmsg("Problem making append-only storage header of type bulk dense content. Found data length %d, expected %d",
							checkCompressedLen,
							dataLength)));
	}
#endif
}
Beispiel #28
0
/*
 * Performs a compaction of an append-only relation in column-orientation.
 *
 * In non-utility mode, all compaction segment files should be
 * marked as in-use/in-compaction in the appendonlywriter.c code. If
 * set, the insert_segno should also be marked as in-use.
  * When the insert segno is negative, only truncate to eof operations
 * can be executed.
 *
 * The caller is required to hold either an AccessExclusiveLock (vacuum full)
 * or a ShareLock on the relation.
 */
void
AOCSCompact(Relation aorel,
			List *compaction_segno,
			int insert_segno,
			bool isFull)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	AOCSInsertDesc insertDesc = NULL;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(RelationIsAoCols(aorel));
	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(insert_segno >= 0);

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Compact AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles);

	if (insert_segno >= 0)
	{
		insertDesc = aocs_insert_init(aorel, insert_segno, false);
	}

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;
		if (!list_member_int(compaction_segno, segno))
		{
			continue;
		}
		if (segno == insert_segno)
		{
			/* We cannot compact the segment file we are inserting to. */
			continue;
		}

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "compaction skips AOCS segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		/*
		 * This should not occur since this segfile info was found by the
		 * "all" method, but better to catch for trouble shooting (possibly
		 * index corruption?)
		 */
		if (fsinfo == NULL)
			elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing",
				 relname,
				 aorel->rd_node.spcNode,
				 aorel->rd_node.dbNode,
				 aorel->rd_node.relNode,
				 segno);

		if (AppendOnlyCompaction_ShouldCompact(aorel,
											   fsinfo->segno, fsinfo->total_tupcount, isFull,
											   appendOnlyMetaDataSnapshot))
		{
			AOCSSegmentFileFullCompaction(aorel, insertDesc, fsinfo,
										  appendOnlyMetaDataSnapshot);
		}

		pfree(fsinfo);
	}

	if (insertDesc != NULL)
		aocs_insert_finish(insertDesc);

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}

	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}
Beispiel #29
0
/*
 * Perform a large write i/o.
 */
static void BufferedAppendWrite(
    BufferedAppend      *bufferedAppend)
{
	int32 writeLen;
	uint8 *largeWriteMemory;
	int	actualLen;

	writeLen = bufferedAppend->largeWriteLen;
	Assert(bufferedAppend->largeWriteLen > 0);
	largeWriteMemory = bufferedAppend->largeWriteMemory;

#ifdef USE_ASSERT_CHECKING
	{
		int64 currentWritePosition; 

		currentWritePosition = FileNonVirtualCurSeek(bufferedAppend->file);
		if (currentWritePosition < 0)
			ereport(ERROR, (errcode_for_file_access(),
							errmsg("unable to get current position in table \"%s\" for file \"%s\" (errcode %d)",
								   bufferedAppend->relationName,
							       bufferedAppend->filePathName,
								   errno)));

		if (currentWritePosition != bufferedAppend->largeWritePosition)
			ereport(ERROR, (errcode_for_file_access(),
							errmsg("Current position mismatch actual "
								   INT64_FORMAT ", expected " INT64_FORMAT " in table \"%s\" for file \"%s\"",
								   currentWritePosition, bufferedAppend->largeWritePosition,
								   bufferedAppend->relationName,
								   bufferedAppend->filePathName)));
	}
#endif	

	while (writeLen > 0) 
	{
		int primaryError;
		bool mirrorDataLossOccurred;
		
		MirroredAppendOnly_Append(
							&bufferedAppend->mirroredOpen,
							(char*)largeWriteMemory,
							writeLen,
							&primaryError,
							&mirrorDataLossOccurred);
		if (primaryError != 0)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("Could not write in table \"%s\" to segment file '%s': %m", 
					 		bufferedAppend->relationName,
							bufferedAppend->filePathName)));
	   
		elogif(Debug_appendonly_print_append_block, LOG,
				"Append-Only storage write: table '%s', segment file '%s', write position " INT64_FORMAT ", "
				"writeLen %d (equals large write length %d is %s)",
				bufferedAppend->relationName,
				bufferedAppend->filePathName,
				bufferedAppend->largeWritePosition,
				writeLen,
				bufferedAppend->largeWriteLen,
				(writeLen == bufferedAppend->largeWriteLen ? "true" : "false"));
	   
		actualLen = writeLen;

		writeLen -= actualLen;
		largeWriteMemory += actualLen;
	}
	
	bufferedAppend->largeWritePosition += bufferedAppend->largeWriteLen;
	bufferedAppend->largeWriteLen = 0;

}
void
AppendOnlyStorageFormat_MakeSmallContentHeader(
	uint8			*headerPtr,
	bool			usingChecksums,
	bool			hasFirstRowNum,
	int				version,
	int64			firstRowNum,
	int				executorKind,
	int				rowCount,
	int32			dataLength,
	int32			compressedLength)
{
	AOSmallContentHeader 	*blockHeader;

	bool			isCompressed;

	Assert(headerPtr != NULL);
	
	blockHeader = (AOSmallContentHeader*)headerPtr;

	elogif(Debug_appendonly_print_storage_headers, LOG,
			 "Append-Only storage make Small Content header parameters: wantChecksum = %s, hasFirstRowNum %s, executorKind = %d, "
			 "rowCount = %d, dataLength %d, compressedLength %d",
			 (usingChecksums ? "true" : "false"),
			 (hasFirstRowNum ? "true" : "false"),
			 executorKind,
			 rowCount,
			 dataLength,
			 compressedLength);
	
	/* Zero out whole header */
	AOSmallContentHeaderInit_Init(blockHeader);
	
	AOSmallContentHeaderInit_headerKind(blockHeader,AoHeaderKind_SmallContent);
	AOSmallContentHeaderInit_executorBlockKind(blockHeader,executorKind);
	AOSmallContentHeaderInit_rowCount(blockHeader,rowCount);
	AOSmallContentHeaderInit_dataLength(blockHeader,dataLength);
	AOSmallContentHeaderInit_hasFirstRowNum(blockHeader,hasFirstRowNum);
	
	isCompressed = (compressedLength > 0);
	if (isCompressed)
		AOSmallContentHeaderInit_compressedLength(blockHeader,compressedLength);

	/*
	 * Add the optional firstRowNum.
	 *
	 * NOTE: This is not part of the 8-byte (64-bit) header because it is so big.
	 * NOTE: And, it is not covered by the header checksum because in order to
	 * NOTE: determine if we should checksum more data we would need to examine
	 * NOTE: the header data not verified by checksum yet...
	 *
	 * So, the firstRowNum is extra data between the header (and checksums) and
	 * the content.  We must add it before computing the checksum.
	 */
	if (hasFirstRowNum)
	{
		AppendOnlyStorageFormat_AddFirstRowNum(
											headerPtr,
											usingChecksums,
											firstRowNum);
	}
	
	if (usingChecksums)
	{
		AppendOnlyStorageFormat_AddBlockHeaderChecksums(
											headerPtr,
											isCompressed,
											hasFirstRowNum,
											version,
											dataLength,
											compressedLength);
	}
	else
	{
		elogif(Debug_appendonly_print_storage_headers, LOG,
				 "Append-Only storage make Small Content header result: smallcontent_bytes_0_3 0x%X, smallcontent_bytes_4_7 0x%X",
				 blockHeader->smallcontent_bytes_0_3,
				 blockHeader->smallcontent_bytes_4_7);
	}

}