Example #1
0
/*
 * Fills in the relation statistics for an append-only relation.
 *
 *	This information is used to update the reltuples and relpages information
 *	in pg_class. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *	column and we simulate relpages by subdividing the eof value
 *	("pg_aoseg_<oid>:eof") over the defined page size.
 */
void
vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot,
							 BlockNumber *rel_pages, double *rel_tuples,
							 bool *relhasindex)
{
	FileSegTotals *fstotal;
	BlockNumber nblocks;
	char	   *relname;
	double		num_tuples;
	double		totalbytes;
	double		eof;
	int64       hidden_tupcount;
	AppendOnlyVisimap visimap;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	/* get updated statistics from the pg_aoseg table */
	if (RelationIsAoRows(aorel))
	{
		fstotal = GetSegFilesTotals(aorel, snapshot);
	}
	else
	{
		Assert(RelationIsAoCols(aorel));
		fstotal = GetAOCSSSegFilesTotals(aorel, snapshot);
	}

	/* calculate the values we care about */
	eof = (double)fstotal->totalbytes;
	num_tuples = (double)fstotal->totaltuples;
	totalbytes = eof;
	nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes);

	AppendOnlyVisimap_Init(&visimap,
						   aorel->rd_appendonly->visimaprelid,
						   aorel->rd_appendonly->visimapidxid,
						   AccessShareLock,
						   snapshot);
	hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap);
	num_tuples -= hidden_tupcount;
	Assert(num_tuples > -1.0);
	AppendOnlyVisimap_Finish(&visimap, AccessShareLock);

	elogif (Debug_appendonly_print_compaction, LOG,
			"Gather statistics after vacuum for append-only relation %s: "
			"page count %d, tuple count %f",
			relname,
			nblocks, num_tuples);

	*rel_pages = nblocks;
	*rel_tuples = num_tuples;
	*relhasindex = aorel->rd_rel->relhasindex;

	ereport(elevel,
			(errmsg("\"%s\": found %.0f rows in %u pages.",
					relname, num_tuples, nblocks)));
	pfree(fstotal);
}
Example #2
0
/*
 * Given a WITH(...) clause and no other column encoding directives -- such as
 * in the case of CREATE TABLE WITH () AS SELECT -- fill in the column encoding
 * catalog entries for that relation.
 */
void
AddDefaultRelationAttributeOptions(Relation rel, List *options)
{
	Datum opts;
	AttrNumber attno;
	List *ce;

	/* only supported on AOCO at this stage */
	if (!RelationIsAoCols(rel))
		return;

 	ce = form_default_storage_directive(options);
	if (!ce)
		ce = default_column_encoding_clause();

	ce = transformStorageEncodingClause(ce);

	opts = transformRelOptions(PointerGetDatum(NULL), ce, true, false);

	for (attno = 1; attno <= RelationGetNumberOfAttributes(rel); attno++)
		add_attribute_encoding_entry(RelationGetRelid(rel),
									 attno,
									 opts);
	CommandCounterIncrement();
}
Example #3
0
/*
 * Returns true if the relation has no tuples.  Prepare phase of
 * compaction invokes this function on each QE.
 *
 * Examples of empty tables:
 * 1. parent of a partitioned table
 * 2. table that is created but no tuples have been inserted yet
 * 3. table from which all existing tuples are deleted and the table
 * is vacuumed.  This is a special case in which pg_aoseg_<oid> has
 * non-zero number of rows but tupcount value is zero for all rows.
 */
bool
AppendOnlyCompaction_IsRelationEmpty(Relation aorel)
{
	AppendOnlyEntry *aoEntry;
	Relation		pg_aoseg_rel;
	TupleDesc		pg_aoseg_dsc;
	HeapTuple		tuple;
	HeapScanDesc	aoscan;
	int				Anum_tupcount;
	bool empty = true;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));

	aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow);
	pg_aoseg_rel = heap_open(aoEntry->segrelid, AccessShareLock);
	pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel);
	aoscan = heap_beginscan(pg_aoseg_rel, SnapshotNow, 0, NULL);
	Anum_tupcount = RelationIsAoRows(aorel)? Anum_pg_aoseg_tupcount: Anum_pg_aocs_tupcount;
	while ((tuple = heap_getnext(aoscan, ForwardScanDirection)) != NULL &&
		   empty)
	{
		if (0 < fastgetattr(tuple, Anum_tupcount,
							pg_aoseg_dsc, NULL))
			empty = false;
	}
	heap_endscan(aoscan);
	heap_close(pg_aoseg_rel, AccessShareLock);
	return empty;
}
Example #4
0
File: dbsize.c Project: pf-qiu/gpdb
/*
 * calculate size of (one fork of) a relation
 *
 * Iterator over all files belong to the relation and do stat.
 * The obviously better way is to use glob.  For whatever reason,
 * glob is extremely slow if there are lots of relations in the
 * database.  So we handle all cases, instead. 
 *
 * Note: we can safely apply this to temp tables of other sessions, so there
 * is no check here or at the call sites for that.
 */
static int64
calculate_relation_size(Relation rel, ForkNumber forknum)
{
	int64		totalsize = 0;
	char	   *relationpath;
	char		pathname[MAXPGPATH];
	unsigned int segcount = 0;

	relationpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum);

if (RelationIsHeap(rel))
{
	/* Ordinary relation, including heap and index.
	 * They take form of relationpath, or relationpath.%d
	 * There will be no holes, therefore, we can stop when
	 * we reach the first non-existing file.
	 */
	for (segcount = 0;; segcount++)
	{
		struct stat fst;

		CHECK_FOR_INTERRUPTS();

		if (segcount == 0)
			snprintf(pathname, MAXPGPATH, "%s",
					 relationpath);
		else
			snprintf(pathname, MAXPGPATH, "%s.%u",
					 relationpath, segcount);

		if (stat(pathname, &fst) < 0)
		{
			if (errno == ENOENT)
				break;
			else
				ereport(ERROR,
						(errcode_for_file_access(),
						 errmsg("could not stat file %s: %m", pathname)));
		}
		totalsize += fst.st_size;
	}
}
/* AO tables don't have any extra forks. */
else if (forknum == MAIN_FORKNUM)
{
	if (RelationIsAoRows(rel))
	{
		totalsize = GetAOTotalBytes(rel, GetActiveSnapshot());
	}
	else if (RelationIsAoCols(rel))
	{
		totalsize = GetAOCSTotalBytes(rel, GetActiveSnapshot(), true);
	}
}

	/* RELSTORAGE_VIRTUAL has no space usage */
	return totalsize;
}
Example #5
0
/**
 * Given the oid of a relation, this method calculates reltuples, relpages. This only looks up
 * local information (on master or segments). It produces meaningful values for AO and
 * heap tables and returns [0.0,0.0] for all other relations.
 * Input: 
 * 	relationoid
 * Output:
 * 	array of two values [reltuples,relpages]
 */
Datum
gp_statistics_estimate_reltuples_relpages_oid(PG_FUNCTION_ARGS)
{
	
	float4		relpages = 0.0;		
	float4		reltuples = 0.0;			
	Oid			relOid = PG_GETARG_OID(0);
	Datum		values[2];
	ArrayType   *result;
	
	Relation rel = try_relation_open(relOid, AccessShareLock, false);

	if (rel != NULL)
	{
		if (rel->rd_rel->relkind == RELKIND_RELATION)
		{
			if (RelationIsHeap(rel))
			{
				gp_statistics_estimate_reltuples_relpages_heap(rel, &reltuples, &relpages);
			}
			else if (RelationIsAoRows(rel))
			{
				gp_statistics_estimate_reltuples_relpages_ao_rows(rel, &reltuples, &relpages);
			}
			else if	(RelationIsAoCols(rel))
			{
				gp_statistics_estimate_reltuples_relpages_ao_cs(rel, &reltuples, &relpages);
			}
		}
		else if (rel->rd_rel->relkind == RELKIND_INDEX)
		{
			reltuples = 1.0;
			relpages = RelationGetNumberOfBlocks(rel);
		}
		else
		{
			/**
			 * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1.
			 */
		}
		relation_close(rel, AccessShareLock);
	}
	else
	{
		/**
		 * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1.
		 */
	}
	
	values[0] = Float4GetDatum(reltuples);
	values[1] = Float4GetDatum(relpages);

	result = construct_array(values, 2,
					FLOAT4OID,
					sizeof(float4), true, 'i');

	PG_RETURN_ARRAYTYPE_P(result);
}
Example #6
0
static void gp_statistics_estimate_reltuples_relpages_ao_cs(Relation rel, float4 *reltuples, float4 *relpages)
{
	AOCSFileSegInfo	**aocsInfo = NULL;
	int				nsegs = 0;
	double			totalBytes = 0;
	AppendOnlyEntry *aoEntry;
	int64 hidden_tupcount;
	AppendOnlyVisimap visimap;

	/**
	 * Ensure that the right kind of relation with the right type of storage is passed to us.
	 */
	Assert(rel->rd_rel->relkind == RELKIND_RELATION);
	Assert(RelationIsAoCols(rel));
	
	*reltuples = 0.0;
	*relpages = 0.0;
	
    /* get table level statistics from the pg_aoseg table */
	aoEntry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow);
	aocsInfo = GetAllAOCSFileSegInfo(rel, aoEntry, SnapshotNow, &nsegs);
	if (aocsInfo)
	{
		int i = 0;
		int j = 0;
		for(i = 0; i < nsegs; i++)
		{
			for(j = 0; j < RelationGetNumberOfAttributes(rel); j++)
			{
				AOCSVPInfoEntry *e = getAOCSVPEntry(aocsInfo[i], j);
				Assert(e);
				totalBytes += e->eof_uncompressed;
			}

			/* Do not include tuples from an awaiting drop segment file */
			if (aocsInfo[i]->state != AOSEG_STATE_AWAITING_DROP)
			{
				*reltuples += aocsInfo[i]->total_tupcount;
			}
		}
		/**
		 * The planner doesn't understand AO's blocks, so need this method to try to fudge up a number for
		 * the planner. 
		 */
		*relpages = RelationGuessNumberOfBlocks(totalBytes);
	}

	AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, SnapshotNow);
	hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap);
	AppendOnlyVisimap_Finish(&visimap, AccessShareLock);

	(*reltuples) -= hidden_tupcount;

	pfree(aoEntry);
	  
	return;
}
Example #7
0
/*
 * calculate size of a relation
 *
 * Iterator over all files belong to the relation and do stat.
 * The obviously better way is to use glob.  For whatever reason,
 * glob is extremely slow if there are lots of relations in the
 * database.  So we handle all cases, instead. 
 */
static int64
calculate_relation_size(Relation rel)
{
	int64		totalsize = 0;
	char	   *relationpath;
	char		pathname[MAXPGPATH];

    struct stat fst;
    int i;

	relationpath = relpath(rel->rd_node);

    if(RelationIsHeap(rel))
    {
        /* Ordinary relation, including heap and index.
         * They take form of relationpath, or relationpath.%d
         * There will be no holes, therefore, we can stop we
         * we reach the first non-exist file.
         */
        for(i=0; ; ++i)
        {
            if (i==0)
                snprintf(pathname, MAXPGPATH, "%s", relationpath); 
            else
                snprintf(pathname, MAXPGPATH, "%s.%d", relationpath, i);

            if (stat(pathname, &fst) >= 0)
                totalsize += fst.st_size;
            else
            {
                if (errno == ENOENT)
                    break;
                else
                    ereport(ERROR, (errcode_for_file_access(), 
                                    errmsg("could not stat file %s: %m", pathname)
                                ));
            }
        }
    }
	else if (RelationIsAoRows(rel))
		totalsize = GetAOTotalBytes(rel, SnapshotNow);
	else if (RelationIsAoCols(rel))
		totalsize = GetAOCSTotalBytes(rel, SnapshotNow);
           
    /* RELSTORAGE_VIRTUAL has no space usage */
    return totalsize;
}
Example #8
0
/*
 *	Compute the on-disk size of files for the relation according to the
 *	stat function, including heap data, index data, toast data, aoseg data,
 *  aoblkdir data, and aovisimap data.
 */
static int64
calculate_total_relation_size(Oid Relid)
{
	Relation	heapRel;
	Oid			toastOid;
	AppendOnlyEntry *aoEntry = NULL;
	int64		size;
	ListCell   *cell;

	heapRel = try_relation_open(Relid, AccessShareLock, false);

	if (!RelationIsValid(heapRel))
		return 0;

	toastOid = heapRel->rd_rel->reltoastrelid;

	if (RelationIsAoRows(heapRel) || RelationIsAoCols(heapRel))
		aoEntry = GetAppendOnlyEntry(Relid, SnapshotNow);
	
	/* Get the heap size */
	if (Relid == 0 || heapRel->rd_node.relNode == 0)
		size = 0;
	else
		size = calculate_relation_size(heapRel); 

	/* Include any dependent indexes */
	if (heapRel->rd_rel->relhasindex)
	{
		List	   *index_oids = RelationGetIndexList(heapRel);

		foreach(cell, index_oids)
		{
			Oid			idxOid = lfirst_oid(cell);
			Relation	iRel;

			iRel = try_relation_open(idxOid, AccessShareLock, false);

			if (RelationIsValid(iRel))
			{
				size += calculate_relation_size(iRel); 

				relation_close(iRel, AccessShareLock);
			}
		}
Example #9
0
/**
 * Drops a segment file.
 *
 */
static void
AOCSCompaction_DropSegmentFile(Relation aorel,
							   int segno)
{
	ItemPointerData persistentTid;
	int64		persistentSerialNum;
	int			pseudoSegNo;
	int			col;

	Assert(RelationIsAoCols(aorel));

	for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
	{
		pseudoSegNo = (col * AOTupleId_MultiplierSegmentFileNum) + segno;

		if (!ReadGpRelationNode(
								aorel->rd_rel->reltablespace,
								aorel->rd_rel->relfilenode,
								pseudoSegNo,
								&persistentTid,
								&persistentSerialNum))
		{
			/* There is nothing to drop */
			return;
		}

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Drop segment file: "
			   "segno %d",
			   pseudoSegNo);

		MirroredFileSysObj_ScheduleDropAppendOnlyFile(
													  &aorel->rd_node,
													  pseudoSegNo,
													  RelationGetRelationName(aorel),
													  &persistentTid,
													  persistentSerialNum);

		DeleteGpRelationNodeTuple(aorel,
								  pseudoSegNo);
	}
}
Example #10
0
/*
 * Drops a segment file.
 *
 * Actually, we just truncate the segfile to 0 bytes, to reclaim the space.
 * Before GPDB 6, we used to remove the file, but with WAL replication, we
 * no longer have a convenient function to remove a single segment of a
 * relation. An empty file is as almost as good as a non-existent file. If
 * the relation is dropped later, the code in mdunlink() will remove all
 * segments, including any empty ones we've left behind.
 */
static void
AOCSCompaction_DropSegmentFile(Relation aorel,
							   int segno)
{
	int			col;

	Assert(RelationIsAoCols(aorel));

	for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
	{
		char		filenamepath[MAXPGPATH];
		int			pseudoSegNo;
		File		fd;

		/* Open and truncate the relation segfile */
		MakeAOSegmentFileName(aorel, segno, col, &pseudoSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Drop segment file: "
			   "segno %d",
			   pseudoSegNo);

		fd = OpenAOSegmentFile(aorel, filenamepath, pseudoSegNo, 0);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0);
			CloseAOSegmentFile(fd);
		}
		else
		{
			/*
			 * The file we were about to drop/truncate didn't exist. That's normal,
			 * for example, if a column is added with ALTER TABLE ADD COLUMN.
			 */
			elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath);
		}
	}
}
Example #11
0
File: aoblkdir.c Project: 50wu/gpdb
void
AlterTableCreateAoBlkdirTable(Oid relOid, bool is_part_child)
{
	Relation	rel;
	TupleDesc	tupdesc;
	IndexInfo  *indexInfo;
	Oid			classObjectId[3];
	int16		coloptions[3];

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) {
		heap_close(rel, NoLock);
		return;
	}

	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(4, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "columngroup_no",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 4,
					   "minipage",
					   VARBITOID,
					   -1, 0);

	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';
    /* TODO (dmeister): In the next line, the index should have been 3. 
     * Therefore the minipage might be toasted.
     */
	tupdesc->attrs[2]->attstorage = 'p'; 

	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 3;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_KeyAttrNumbers[2] = 3;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_Concurrent = false;
	
	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;
	classObjectId[2] = INT8_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;
	coloptions[2] = 0;

	(void) CreateAOAuxiliaryTable(rel,
			"pg_aoblkdir",
			RELKIND_AOBLOCKDIR,
			tupdesc, indexInfo, classObjectId, coloptions);

	heap_close(rel, NoLock);
}
Example #12
0
/*
 * Performs a compaction of an append-only AOCS relation.
 *
 * In non-utility mode, all compaction segment files should be
 * marked as in-use/in-compaction in the appendonlywriter.c code.
 *
 */
void
AOCSDrop(Relation aorel,
		 List *compaction_segno)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Drop AOCS relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel,
										  appendOnlyMetaDataSnapshot, &total_segfiles);

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;
		if (!list_member_int(compaction_segno, segno))
		{
			continue;
		}

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "drop skips AOCS segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		if (fsinfo->state == AOSEG_STATE_AWAITING_DROP)
		{
			Assert(HasLockForSegmentFileDrop(aorel));
			AOCSCompaction_DropSegmentFile(aorel, segno);
			ClearAOCSFileSegInfo(aorel, segno, AOSEG_STATE_DEFAULT);
		}
		pfree(fsinfo);
	}

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}
Example #13
0
void
AlterTableCreateAoVisimapTable(Oid relOid, bool is_part_child)
{
	Relation	rel;
	IndexInfo  *indexInfo;
	TupleDesc	tupdesc;
	Oid			classObjectId[2];
	int16		coloptions[2];

	elogif(Debug_appendonly_print_visimap, LOG,
		   "Create visimap for relation %d",
		   relOid);

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel))
	{
		heap_close(rel, NoLock);
		return;
	}

	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(Natts_pg_aovisimap, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "visimap",
					   BYTEAOID,
					   -1, 0);

	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 2;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_Concurrent = false;

	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT8_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;

	(void) CreateAOAuxiliaryTable(rel,
								  "pg_aovisimap",
								  RELKIND_AOVISIMAP,
								  tupdesc, indexInfo, classObjectId, coloptions);

	heap_close(rel, NoLock);
}
static Datum
gp_aovisimap_hidden_info_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[3];
	bool		nulls[3];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		AppendOnlyVisimap visiMap;

		Relation parentRelation;

		FileSegInfo **appendonlySegfileInfo;
		AOCSFileSegInfo **aocsSegfileInfo;
		int segfile_info_total;

		int i;
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;

		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(3, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "hidden_tupcount",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "total_tupcount",
						   INT8OID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->parentRelation = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		if (RelationIsAoRows(context->parentRelation))
		{
			context->appendonlySegfileInfo = GetAllFileSegInfo(
				context->parentRelation,
				SnapshotNow,
				&context->segfile_info_total);
		}
		else
		{
			Assert(RelationIsAoCols(context->parentRelation));
			context->aocsSegfileInfo = GetAllAOCSFileSegInfo(context->parentRelation,
					SnapshotNow, &context->segfile_info_total);
		}
		context->i = 0;

		AppendOnlyVisimap_Init(&context->visiMap,
				context->parentRelation->rd_appendonly->visimaprelid,
				context->parentRelation->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	while (context->i < context->segfile_info_total)
	{
		int64 tupcount;
		int segno;
		if (context->appendonlySegfileInfo)
		{
			FileSegInfo *fsinfo = context->appendonlySegfileInfo[context->i];
			tupcount = fsinfo->total_tupcount;
			segno = fsinfo->segno;
		}
		else if (context->aocsSegfileInfo)
		{
			AOCSFileSegInfo *fsinfo = context->aocsSegfileInfo[context->i];
			tupcount = fsinfo->total_tupcount;
			segno = fsinfo->segno;
		}
		else
		{
			Insist(false);
		}

		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = Int32GetDatum(segno);
		values[1] = Int64GetDatum(AppendOnlyVisimap_GetSegmentFileHiddenTupleCount(
					&context->visiMap, segno));
		values[2] = Int64GetDatum(tupcount);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		context->i++;
		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock);
	if (context->appendonlySegfileInfo)
	{
		FreeAllSegFileInfo(context->appendonlySegfileInfo, context->segfile_info_total);
		pfree(context->appendonlySegfileInfo);
		context->appendonlySegfileInfo = NULL;
	}
	if (context->aocsSegfileInfo)
	{
		FreeAllAOCSSegFileInfo(context->aocsSegfileInfo, context->segfile_info_total);
		pfree(context->aocsSegfileInfo);
		context->aocsSegfileInfo = NULL;

	}
	heap_close(context->parentRelation, AccessShareLock);
	pfree(context);
	funcctx->user_fctx = NULL;

	SRF_RETURN_DONE(funcctx);
}
Example #15
0
/*
 * AOCSSegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AOCSSegmentFileTruncateToEOF(Relation aorel,
							 AOCSFileSegInfo *fsinfo)
{
	const char *relname = RelationGetRelationName(aorel);
	int			segno;
	int			j;

	Assert(fsinfo);
	Assert(RelationIsAoCols(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);

	for (j = 0; j < fsinfo->vpinfo.nEntry; ++j)
	{
		int64		segeof;
		char		filenamepath[MAXPGPATH];
		AOCSVPInfoEntry *entry;
		File		fd;
		int32		fileSegNo;

		entry = getAOCSVPEntry(fsinfo, j);
		segeof = entry->eof;

		/* Open and truncate the relation segfile to its eof */
		MakeAOSegmentFileName(aorel, segno, j, &fileSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
			   get_namespace_name(RelationGetNamespace(aorel)),
			   relname,
			   aorel->rd_id,
			   aorel->rd_node.relNode,
			   j,
			   segno,
			   fileSegNo,
			   segeof);

		fd = OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof);
			CloseAOSegmentFile(fd);

			elogif(Debug_appendonly_print_compaction, LOG,
				   "Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				   "No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
	}
}
Example #16
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 */
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
		lazy_truncate_heap(onerel, vacrelstats);

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	/* Update statistics in pg_class */
	vac_update_relstats(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);
}
Example #17
0
/* ----------------------------------------------------------------
 *		ExecDelete
 *
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed.
 *		DELETE can be part of an update operation when
 *		there is a preceding SplitUpdate node. 
 *
 * ----------------------------------------------------------------
 */
void
ExecDelete(ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
		   EState *estate,
		   PlanGenerator planGen,
		   bool isUpdate)
{
	ResultRelInfo *resultRelInfo;
	Relation resultRelationDesc;
	HTSU_Result result;
	ItemPointerData update_ctid;
	TransactionId update_xmax;

	/*
	 * Get information on the (current) result relation.
	 */
	if (estate->es_result_partitions && planGen == PLANGEN_OPTIMIZER)
	{
		Assert(estate->es_result_partitions->part->parrelid);

#ifdef USE_ASSERT_CHECKING
		Oid parent = estate->es_result_partitions->part->parrelid;
#endif

		/* Obtain part for current tuple. */
		resultRelInfo = slot_get_partition(planSlot, estate);
		estate->es_result_relation_info = resultRelInfo;

#ifdef USE_ASSERT_CHECKING
		Oid part = RelationGetRelid(resultRelInfo->ri_RelationDesc);
#endif

		Assert(parent != part);
	}
	else
	{
		resultRelInfo = estate->es_result_relation_info;
	}
	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	Assert (!resultRelInfo->ri_projectReturning);

	if (planGen == PLANGEN_PLANNER)
	{
		/* BEFORE ROW DELETE Triggers */
		if (resultRelInfo->ri_TrigDesc &&
			resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
		{
			bool		dodelete;

			dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
											estate->es_snapshot->curcid);

			if (!dodelete)			/* "do nothing" */
				return;
		}
	}

	bool isHeapTable = RelationIsHeap(resultRelationDesc);
	bool isAORowsTable = RelationIsAoRows(resultRelationDesc);
	bool isAOColsTable = RelationIsAoCols(resultRelationDesc);
	bool isExternalTable = RelationIsExternal(resultRelationDesc);

	if (isExternalTable && estate->es_result_partitions && 
		estate->es_result_partitions->part->parrelid != 0)
	{
		ereport(ERROR,
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			errmsg("Delete from external partitions not supported.")));			
		return;
	}
	/*
	 * delete the tuple
	 *
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
	 * serialize error if not.	This is a special-case behavior needed for
	 * referential integrity updates in serializable transactions.
	 */
ldelete:;
	if (isHeapTable)
	{
		result = heap_delete(resultRelationDesc, tupleid,
						 &update_ctid, &update_xmax,
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
						 true /* wait for commit */ );
	}
	else if (isAORowsTable)
	{
		if (IsXactIsoLevelSerializable)
		{
			if (!isUpdate)
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Deletes on append-only tables are not supported in serializable transactions.")));		
			else
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Updates on append-only tables are not supported in serializable transactions.")));	
		}

		if (resultRelInfo->ri_deleteDesc == NULL)
		{
			resultRelInfo->ri_deleteDesc = 
				appendonly_delete_init(resultRelationDesc, ActiveSnapshot);
		}

		AOTupleId* aoTupleId = (AOTupleId*)tupleid;
		result = appendonly_delete(resultRelInfo->ri_deleteDesc, aoTupleId);
	} 
	else if (isAOColsTable)
	{
		if (IsXactIsoLevelSerializable)
		{
			if (!isUpdate)
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Deletes on append-only tables are not supported in serializable transactions.")));		
			else
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Updates on append-only tables are not supported in serializable transactions.")));		
		}

		if (resultRelInfo->ri_deleteDesc == NULL)
		{
			resultRelInfo->ri_deleteDesc = 
				aocs_delete_init(resultRelationDesc);
		}

		AOTupleId* aoTupleId = (AOTupleId*)tupleid;
		result = aocs_delete(resultRelInfo->ri_deleteDesc, aoTupleId);
	}
	else
	{
		Insist(0);
	}
	switch (result)
	{
		case HeapTupleSelfUpdated:
			/* already deleted by self; nothing to do */
		
			/*
			 * In an scenario in which R(a,b) and S(a,b) have 
			 *        R               S
			 *    ________         ________
			 *     (1, 1)           (1, 2)
			 *                      (1, 7)
 			 *
   			 *  An update query such as:
 			 *   UPDATE R SET a = S.b  FROM S WHERE R.b = S.a;
 			 *   
 			 *  will have an non-deterministic output. The tuple in R 
			 * can be updated to (2,1) or (7,1).
 			 * Since the introduction of SplitUpdate, these queries will 
			 * send multiple requests to delete the same tuple. Therefore, 
			 * in order to avoid a non-deterministic output, 
			 * an error is reported in such scenario.
 			 */
			if (isUpdate)
			{

				ereport(ERROR,
					(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION ),
					errmsg("multiple updates to a row by the same query is not allowed")));
			}

			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
			if (IsXactIsoLevelSerializable)
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
						 errmsg("could not serialize access due to concurrent update")));
			else if (!ItemPointerEquals(tupleid, &update_ctid))
			{
				TupleTableSlot *epqslot;

				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
									   update_xmax,
									   estate->es_snapshot->curcid);
				if (!TupIsNull(epqslot))
				{
					*tupleid = update_ctid;
					goto ldelete;
				}
			}
			/* tuple already deleted; nothing to do */
			return;

		default:
			elog(ERROR, "unrecognized heap_delete status: %u", result);
			return;
	}

	if (!isUpdate)
	{
		IncrDeleted();
		(estate->es_processed)++;
		/*
		 * To notify master if tuples deleted or not, to update mod_count.
		 */
		(resultRelInfo->ri_aoprocessed)++;
	}

	/*
	 * Note: Normally one would think that we have to delete index tuples
	 * associated with the heap tuple now...
	 *
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
	 */


	if (planGen == PLANGEN_PLANNER)
	{
		/* AFTER ROW DELETE Triggers */
		ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
	}
}
Example #18
0
/*
 * Performs a compaction of an append-only relation in column-orientation.
 *
 * In non-utility mode, all compaction segment files should be
 * marked as in-use/in-compaction in the appendonlywriter.c code. If
 * set, the insert_segno should also be marked as in-use.
  * When the insert segno is negative, only truncate to eof operations
 * can be executed.
 *
 * The caller is required to hold either an AccessExclusiveLock (vacuum full)
 * or a ShareLock on the relation.
 */
void
AOCSCompact(Relation aorel,
			List *compaction_segno,
			int insert_segno,
			bool isFull)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	AOCSInsertDesc insertDesc = NULL;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(RelationIsAoCols(aorel));
	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(insert_segno >= 0);

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Compact AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles);

	if (insert_segno >= 0)
	{
		insertDesc = aocs_insert_init(aorel, insert_segno, false);
	}

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;
		if (!list_member_int(compaction_segno, segno))
		{
			continue;
		}
		if (segno == insert_segno)
		{
			/* We cannot compact the segment file we are inserting to. */
			continue;
		}

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "compaction skips AOCS segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		/*
		 * This should not occur since this segfile info was found by the
		 * "all" method, but better to catch for trouble shooting (possibly
		 * index corruption?)
		 */
		if (fsinfo == NULL)
			elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing",
				 relname,
				 aorel->rd_node.spcNode,
				 aorel->rd_node.dbNode,
				 aorel->rd_node.relNode,
				 segno);

		if (AppendOnlyCompaction_ShouldCompact(aorel,
											   fsinfo->segno, fsinfo->total_tupcount, isFull,
											   appendOnlyMetaDataSnapshot))
		{
			AOCSSegmentFileFullCompaction(aorel, insertDesc, fsinfo,
										  appendOnlyMetaDataSnapshot);
		}

		pfree(fsinfo);
	}

	if (insertDesc != NULL)
		aocs_insert_finish(insertDesc);

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}

	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}
Example #19
0
/*
 * Assumes that the segment file lock is already held.
 * Assumes that the segment file should be compacted.
 */
static bool
AOCSSegmentFileFullCompaction(Relation aorel,
							  AOCSInsertDesc insertDesc,
							  AOCSFileSegInfo *fsinfo,
							  Snapshot snapshot)
{
	const char *relname;
	AppendOnlyVisimap visiMap;
	AOCSScanDesc scanDesc;
	TupleDesc	tupDesc;
	TupleTableSlot *slot;
	int			compact_segno;
	int64		movedTupleCount = 0;
	ResultRelInfo *resultRelInfo;
	MemTupleBinding *mt_bind;
	EState	   *estate;
	bool	   *proj;
	int			i;
	AOTupleId  *aoTupleId;
	int64		tupleCount = 0;
	int64		tuplePerPage = INT_MAX;

	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(RelationIsAoCols(aorel));
	Assert(insertDesc);

	compact_segno = fsinfo->segno;
	if (fsinfo->varblockcount > 0)
	{
		tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount;
	}
	relname = RelationGetRelationName(aorel);

	AppendOnlyVisimap_Init(&visiMap,
						   aorel->rd_appendonly->visimaprelid,
						   aorel->rd_appendonly->visimapidxid,
						   ShareLock,
						   snapshot);

	elogif(Debug_appendonly_print_compaction,
		   LOG, "Compact AO segfile %d, relation %sd",
		   compact_segno, relname);

	proj = palloc0(sizeof(bool) * RelationGetNumberOfAttributes(aorel));
	for (i = 0; i < RelationGetNumberOfAttributes(aorel); ++i)
	{
		proj[i] = true;
	}
	scanDesc = aocs_beginrangescan(aorel,
								   snapshot, snapshot,
								   &compact_segno, 1, NULL, proj);

	tupDesc = RelationGetDescr(aorel);
	slot = MakeSingleTupleTableSlot(tupDesc);
	mt_bind = create_memtuple_binding(tupDesc);

	/*
	 * We need a ResultRelInfo and an EState so we can use the regular
	 * executor's index-entry-making machinery.
	 */
	estate = CreateExecutorState();
	resultRelInfo = makeNode(ResultRelInfo);
	resultRelInfo->ri_RangeTableIndex = 1;	/* dummy */
	resultRelInfo->ri_RelationDesc = aorel;
	resultRelInfo->ri_TrigDesc = NULL;	/* we don't fire triggers */
	ExecOpenIndices(resultRelInfo);
	estate->es_result_relations = resultRelInfo;
	estate->es_num_result_relations = 1;
	estate->es_result_relation_info = resultRelInfo;

	while (aocs_getnext(scanDesc, ForwardScanDirection, slot))
	{
		CHECK_FOR_INTERRUPTS();

		aoTupleId = (AOTupleId *) slot_get_ctid(slot);
		if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId))
		{
			AOCSMoveTuple(slot,
						  insertDesc,
						  resultRelInfo,
						  estate);
			movedTupleCount++;
		}
		else
		{
			/* Tuple is invisible and needs to be dropped */
			AppendOnlyThrowAwayTuple(aorel,
									 slot,
									 mt_bind);
		}

		/*
		 * Check for vacuum delay point after approximatly a var block
		 */
		tupleCount++;
		if (VacuumCostActive && tupleCount % tuplePerPage == 0)
		{
			vacuum_delay_point();
		}
	}

	SetAOCSFileSegInfoState(aorel, compact_segno,
							AOSEG_STATE_AWAITING_DROP);

	AppendOnlyVisimap_DeleteSegmentFile(&visiMap,
										compact_segno);

	/* Delete all mini pages of the segment files if block directory exists */
	if (OidIsValid(aorel->rd_appendonly->blkdirrelid))
	{
		AppendOnlyBlockDirectory_DeleteSegmentFile(aorel,
												   snapshot,
												   compact_segno,
												   0);
	}

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Finished compaction: "
		   "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT,
		   compact_segno, relname, movedTupleCount);

	AppendOnlyVisimap_Finish(&visiMap, NoLock);

	ExecCloseIndices(resultRelInfo);
	FreeExecutorState(estate);

	ExecDropSingleTupleTableSlot(slot);
	destroy_memtuple_binding(mt_bind);

	aocs_endscan(scanDesc);
	pfree(proj);

	return true;
}
Example #20
0
/*
 *	vacuum_appendonly_rel() -- vaccum an append-only relation
 *
 *		This procedure will be what gets executed both for VACUUM
 *		and VACUUM FULL (and also ANALYZE or any other thing that
 *		needs the pg_class stats updated).
 *
 *		The function can compact append-only segment files or just
 *		truncating the segment file to its existing eof.
 *
 *		Afterwards, the reltuples and relpages information in pg_class
 *		are updated. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *		column and we simulate relpages by subdividing the eof value
 *		("pg_aoseg_<oid>:eof") over the defined page size.
 *
 *
 *		There are txn ids, hint bits, free space, dead tuples,
 *		etc. these are all irrelevant in the append only relation context.
 *
 */
void
vacuum_appendonly_rel(Relation aorel, VacuumStmt *vacstmt)
{
	char	   *relname;
	PGRUsage	ru0;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));
	Assert(!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt));

	pg_rusage_init(&ru0);
	relname = RelationGetRelationName(aorel);
	ereport(elevel,
			(errmsg("vacuuming \"%s.%s\"",
					get_namespace_name(RelationGetNamespace(aorel)),
					relname)));

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		return;
	}
	Assert(list_length(vacstmt->appendonly_compaction_insert_segno) <= 1);
	if (vacstmt->appendonly_compaction_insert_segno == NULL)
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum drop phase %s", RelationGetRelationName(aorel));

		if (RelationIsAoRows(aorel))
		{
			AppendOnlyDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
		else
		{
			Assert(RelationIsAoCols(aorel));
			AOCSDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
	}
	else
	{
		int insert_segno = linitial_int(vacstmt->appendonly_compaction_insert_segno);
		if (insert_segno == APPENDONLY_COMPACTION_SEGNO_INVALID)
		{
			elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum pseudo-compaction phase %s", RelationGetRelationName(aorel));
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				"Vacuum compaction phase %s", RelationGetRelationName(aorel));
			if (RelationIsAoRows(aorel))
			{
				AppendOnlyCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
			else
			{
				Assert(RelationIsAoCols(aorel));
				AOCSCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
		}
	}
}
Example #21
0
/* ----------------------------------------------------------------
 *		ExecUpdate
 *
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
 * ----------------------------------------------------------------
 */
void
ExecUpdate(TupleTableSlot *slot,
		   ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
		   EState *estate)
{
	void*	tuple;
	ResultRelInfo *resultRelInfo;
	Relation	resultRelationDesc;
	HTSU_Result result;
	ItemPointerData update_ctid;
	TransactionId update_xmax;
	AOTupleId	aoTupleId = AOTUPLEID_INIT;
	TupleTableSlot *partslot = NULL;

	/*
	 * abort the operation if not running transactions
	 */
	if (IsBootstrapProcessingMode())
		elog(ERROR, "cannot UPDATE during bootstrap");
	
	/*
	 * get information on the (current) result relation
	 */
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	bool		rel_is_heap = RelationIsHeap(resultRelationDesc);
	bool 		rel_is_aorows = RelationIsAoRows(resultRelationDesc);
	bool		rel_is_aocols = RelationIsAoCols(resultRelationDesc);
	bool		rel_is_external = RelationIsExternal(resultRelationDesc);

	/*
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
	 */
	if (rel_is_heap)
	{
		partslot = slot;
		tuple = ExecFetchSlotHeapTuple(partslot);
	}
	else if (rel_is_aorows || rel_is_aocols)
	{
		/*
		 * It is necessary to reconstruct a logically compatible tuple to
		 * a phyiscally compatible tuple.  The slot's tuple descriptor comes
		 * from the projection target list, which doesn't indicate dropped
		 * columns, and MemTuple cannot deal with cases without converting
		 * the target list back into the original relation's tuple desc.
		 */
		partslot = reconstructMatchingTupleSlot(slot, resultRelInfo);

		/*
		 * We directly inline toasted columns here as update with toasted columns
		 * would create two references to the same toasted value.
		 */
		tuple = ExecFetchSlotMemTuple(partslot, true);
	}
	else if (rel_is_external) 
	{
		if (estate->es_result_partitions && 
			estate->es_result_partitions->part->parrelid != 0)
		{
			ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				errmsg("Update external partitions not supported.")));			
			return;
		}
		else
		{
			partslot = slot;
			tuple = ExecFetchSlotHeapTuple(partslot);
		}
	}
	else 
	{
		Insist(false);
	}

	/* see if this update would move the tuple to a different partition */
	if (estate->es_result_partitions)
		checkPartitionUpdate(estate, partslot, resultRelInfo);

	/* BEFORE ROW UPDATE Triggers */
	if (resultRelInfo->ri_TrigDesc &&
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
	{
		HeapTuple	newtuple;

		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
										tupleid, tuple,
										estate->es_snapshot->curcid);

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
			/*
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
			 */
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor);
			ExecStoreGenericTuple(newtuple, newslot, false);
            newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */
			partslot = newslot;
			tuple = newtuple;
		}
	}

	/*
	 * Check the constraints of the tuple
	 *
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
	 */
lreplace:;
	if (resultRelationDesc->rd_att->constr)
		ExecConstraints(resultRelInfo, partslot, estate);

	if (!GpPersistent_IsPersistentRelation(resultRelationDesc->rd_id))
	{
		/*
		 * Normal UPDATE path.
		 */

		/*
		 * replace the heap tuple
		 *
		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
		 * the row to be updated is visible to that snapshot, and throw a can't-
		 * serialize error if not.	This is a special-case behavior needed for
		 * referential integrity updates in serializable transactions.
		 */
		if (rel_is_heap)
		{
			result = heap_update(resultRelationDesc, tupleid, tuple,
							 &update_ctid, &update_xmax,
							 estate->es_snapshot->curcid,
							 estate->es_crosscheck_snapshot,
							 true /* wait for commit */ );
		} 
		else if (rel_is_aorows)
		{
			if (IsXactIsoLevelSerializable)
			{
				ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					errmsg("Updates on append-only tables are not supported in serializable transactions.")));			
			}

			if (resultRelInfo->ri_updateDesc == NULL)
			{
				ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
				resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc)
					appendonly_update_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno);
			}
			result = appendonly_update(resultRelInfo->ri_updateDesc,
								 tuple, (AOTupleId *) tupleid, &aoTupleId);
		}
		else if (rel_is_aocols)
		{
			if (IsXactIsoLevelSerializable)
			{
				ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					errmsg("Updates on append-only tables are not supported in serializable transactions.")));			
			}

			if (resultRelInfo->ri_updateDesc == NULL)
			{
				ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
				resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc)
					aocs_update_init(resultRelationDesc, resultRelInfo->ri_aosegno);
			}
			result = aocs_update(resultRelInfo->ri_updateDesc,
								 partslot, (AOTupleId *) tupleid, &aoTupleId);
		}
		else
		{
			Assert(!"We should not be here");
		}
		switch (result)
		{
			case HeapTupleSelfUpdated:
				/* already deleted by self; nothing to do */
				return;

			case HeapTupleMayBeUpdated:
				break;

			case HeapTupleUpdated:
				if (IsXactIsoLevelSerializable)
					ereport(ERROR,
							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
							 errmsg("could not serialize access due to concurrent update")));
				else if (!ItemPointerEquals(tupleid, &update_ctid))
				{
					TupleTableSlot *epqslot;

					epqslot = EvalPlanQual(estate,
										   resultRelInfo->ri_RangeTableIndex,
										   &update_ctid,
										   update_xmax,
										   estate->es_snapshot->curcid);
					if (!TupIsNull(epqslot))
					{
						*tupleid = update_ctid;
						partslot = ExecFilterJunk(estate->es_junkFilter, epqslot);
						tuple = ExecFetchSlotHeapTuple(partslot);
						goto lreplace;
					}
				}
				/* tuple already deleted; nothing to do */
				return;

			default:
				elog(ERROR, "unrecognized heap_update status: %u", result);
				return;
		}
	}
	else
	{
		HeapTuple persistentTuple;

		/*
		 * Persistent metadata path.
		 */
		persistentTuple = heap_copytuple(tuple);
		persistentTuple->t_self = *tupleid;

		frozen_heap_inplace_update(resultRelationDesc, persistentTuple);

		heap_freetuple(persistentTuple);
	}

	IncrReplaced();
	(estate->es_processed)++;
	(resultRelInfo->ri_aoprocessed)++;

	/*
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
	 * here is insert new index tuples.  -cim 9/27/89
	 */
	/*
	 * insert index entries for tuple
	 *
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
	 */
	if (rel_is_aorows || rel_is_aocols)
	{
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false);
	}
	else
	{
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, &(((HeapTuple) tuple)->t_self), estate, false);
	}

	/* AFTER ROW UPDATE Triggers */
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);

}
Example #22
0
/*
 * Create append-only auxiliary relations for target relation rel.
 * Returns true if they are newly created.  If pg_appendonly has already
 * known those tables, don't create them and returns false.
 */
bool
CreateAOAuxiliaryTable(
		Relation rel,
		const char *auxiliaryNamePrefix,
		char relkind,
		TupleDesc tupledesc,
		IndexInfo  *indexInfo,
		Oid	*classObjectId,
		int16 *coloptions)
{
	char aoauxiliary_relname[NAMEDATALEN];
	char aoauxiliary_idxname[NAMEDATALEN];
	bool shared_relation;
	Oid relOid, aoauxiliary_relid = InvalidOid;
	Oid aoauxiliary_idxid = InvalidOid;
	ObjectAddress baseobject;
	ObjectAddress aoauxiliaryobject;

	Assert(RelationIsValid(rel));
	Assert(RelationIsAoRows(rel) || RelationIsAoCols(rel));
	Assert(auxiliaryNamePrefix);
	Assert(tupledesc);
	Assert(classObjectId);
	if (relkind != RELKIND_AOSEGMENTS)
		Assert(indexInfo);

	shared_relation = rel->rd_rel->relisshared;
	/*
	 * We cannot allow creating an auxiliary table for a shared relation
	 * after initdb (because there's no way to let other databases know
	 * this visibility map.
	 */
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot have append-only auxiliary relations after initdb")));

	relOid = RelationGetRelid(rel);

	switch(relkind)
	{
		case RELKIND_AOVISIMAP:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,
				NULL, NULL, &aoauxiliary_relid, &aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,
				&aoauxiliary_relid, &aoauxiliary_idxid, NULL, NULL);
			break;
		case RELKIND_AOSEGMENTS:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow,
				&aoauxiliary_relid,
				NULL, NULL, NULL, NULL);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Does it have the auxiliary relation?
	 */
	if (OidIsValid(aoauxiliary_relid))
	{
		return false;
	}

	snprintf(aoauxiliary_relname, sizeof(aoauxiliary_relname),
			 "%s_%u", auxiliaryNamePrefix, relOid);
	snprintf(aoauxiliary_idxname, sizeof(aoauxiliary_idxname),
			 "%s_%u_index", auxiliaryNamePrefix, relOid);

	/*
	 * We place auxiliary relation in the pg_aoseg namespace
	 * even if its master relation is a temp table. There cannot be
	 * any naming collision, and the auxiliary relation will be
	 * destroyed when its master is, so there is no need to handle
	 * the aovisimap relation as temp.
	 */
	aoauxiliary_relid = heap_create_with_catalog(aoauxiliary_relname,
											     PG_AOSEGMENT_NAMESPACE,
											     rel->rd_rel->reltablespace,
											     InvalidOid,
											     rel->rd_rel->relowner,
											     tupledesc,
											     /* relam */ InvalidOid,
											     relkind,
											     RELSTORAGE_HEAP,
											     shared_relation,
											     true,
											     /* bufferPoolBulkLoad */ false,
											     0,
											     ONCOMMIT_NOOP,
											     NULL, /* GP Policy */
											     (Datum) 0,
											     true,
												 /* valid_opts */ false,
											     /* persistentTid */ NULL,
											     /* persistentSerialNum */ NULL);

	/* Make this table visible, else index creation will fail */
	CommandCounterIncrement();

	/* Create an index on AO auxiliary tables (like visimap) except for pg_aoseg table */
	if (relkind != RELKIND_AOSEGMENTS)
	{
		aoauxiliary_idxid = index_create(aoauxiliary_relid,
										 aoauxiliary_idxname,
										 InvalidOid,
										 indexInfo,
										 BTREE_AM_OID,
										 rel->rd_rel->reltablespace,
										 classObjectId, coloptions, (Datum) 0,
										 true, false, true, false,
										 false, NULL);

		/* Unlock target table -- no one can see it */
		UnlockRelationOid(aoauxiliary_relid, ShareLock);

		/* Unlock the index -- no one can see it anyway */
		UnlockRelationOid(aoauxiliary_idxid, AccessExclusiveLock);
	}

	/*
	 * Store the auxiliary table's OID in the parent relation's pg_appendonly row.
	 * TODO (How to generalize this?)
	 */
	switch (relkind)
	{
		case RELKIND_AOVISIMAP:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid,
								 InvalidOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid,
								 InvalidOid, InvalidOid);
			break;
		case RELKIND_AOSEGMENTS:
			UpdateAppendOnlyEntryAuxOids(relOid,
								 aoauxiliary_relid,
								 InvalidOid, InvalidOid,
								 InvalidOid, InvalidOid);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Register dependency from the auxiliary table to the master, so that the
	 * aoseg table will be deleted if the master is.
	 */
	baseobject.classId = RelationRelationId;
	baseobject.objectId = relOid;
	baseobject.objectSubId = 0;
	aoauxiliaryobject.classId = RelationRelationId;
	aoauxiliaryobject.objectId = aoauxiliary_relid;
	aoauxiliaryobject.objectSubId = 0;

	recordDependencyOn(&aoauxiliaryobject, &baseobject, DEPENDENCY_INTERNAL);

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
static Datum
gp_aovisimap_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[3];
	bool		nulls[3];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		Relation aorel;
		AppendOnlyVisimapScan visiMapScan;
		AOTupleId aoTupleId;
		
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;
		
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(3, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tid",
						   TIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "row_num",
						   INT8OID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->aorel = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->aorel) || RelationIsAoCols(context->aorel)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		AppendOnlyVisimapScan_Init(&context->visiMapScan,
				context->aorel->rd_appendonly->visimaprelid,
				context->aorel->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);
		AOTupleIdInit_Init(&context->aoTupleId);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	while (true)
	{
		if (!AppendOnlyVisimapScan_GetNextInvisible(
				&context->visiMapScan,
				&context->aoTupleId))
		{
			break;
		}
		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = ItemPointerGetDatum((ItemPointer)&context->aoTupleId);
		values[1] = Int32GetDatum(AOTupleIdGet_segmentFileNum(&context->aoTupleId));
		values[2] = Int64GetDatum(AOTupleIdGet_rowNum(&context->aoTupleId));

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimapScan_Finish(&context->visiMapScan, AccessShareLock);
	heap_close(context->aorel, AccessShareLock);
	pfree(context);
	funcctx->user_fctx = NULL;
	SRF_RETURN_DONE(funcctx);
}
Example #24
0
/*
 * Returns true iff the given segment file should be compacted.
 */
bool
AppendOnlyCompaction_ShouldCompact(
	Relation aoRelation,
	AppendOnlyEntry *aoEntry,
	int segno,
	int64 segmentTotalTupcount,
	bool isFull)
{
	bool result;
	AppendOnlyVisimap visiMap;
	int64 hiddenTupcount;
	int hideRatio;

	Assert(RelationIsAoRows(aoRelation) || RelationIsAoCols(aoRelation));

	if (!gp_appendonly_compaction)
	{
		ereport(LOG,
			(errmsg("Append-only compaction skipped on relation %s, segment file num %d",
				RelationGetRelationName(aoRelation),
				segno),
			 errdetail("Compaction is disabled")));
		/* Disable compaction by global guc. */
		return false;
	}

	AppendOnlyVisimap_Init(&visiMap,
			aoEntry->visimaprelid,
			aoEntry->visimapidxid,
			ShareLock,
			SnapshotNow);
	hiddenTupcount = AppendOnlyVisimap_GetSegmentFileHiddenTupleCount(
			&visiMap, segno);

	result = true;
	if (isFull && hiddenTupcount > 0)
	{
		/* 
		 * if it is a full vacuum and there is any obsolete data, do a compaction
		 */
		result = true;
	}
	else
	{
	hideRatio = AppendOnlyCompaction_GetHideRatio(hiddenTupcount, segmentTotalTupcount);
	if (hideRatio <= gp_appendonly_compaction_threshold || gp_appendonly_compaction_threshold == 0)
	{
			if (hiddenTupcount > 0)
			{
				ereportif(Debug_appendonly_print_compaction, LOG, 
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d, "
					"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
					"hide ratio %d%%, threshold %d%%",
					RelationGetRelationName(aoRelation),
					segno,
					hiddenTupcount, segmentTotalTupcount, 
					hideRatio, gp_appendonly_compaction_threshold)));
				ereport(LOG,
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d",
					RelationGetRelationName(aoRelation),
					segno),
					errdetail("Ratio of obsolete tuples below threshold (%d%% vs %d%%)",
						hideRatio, gp_appendonly_compaction_threshold)));
			}
			else
			{
				ereportif(Debug_appendonly_print_compaction, LOG, 
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d, "
					"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
					"hide ratio %d%%, threshold %d%%",
					RelationGetRelationName(aoRelation),
					segno,
					hiddenTupcount, segmentTotalTupcount, 
					hideRatio, gp_appendonly_compaction_threshold)));
			}
			result = false;
		}
		elogif(Debug_appendonly_print_compaction, LOG, 
			"Schedule compaction: "
			"segno %d, "
			"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
			"hide ratio %d%%, threshold %d%%",
			segno,
			hiddenTupcount, segmentTotalTupcount, 
			hideRatio, gp_appendonly_compaction_threshold);
	}
	AppendOnlyVisimap_Finish(&visiMap, ShareLock);
	return result;
}
static Datum
gp_aovisimap_entry_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[4];
	bool		nulls[4];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		AppendOnlyVisimap visiMap;

		Relation parentRelation;

		IndexScanDesc indexScan;

		text *bitmapBuffer;
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;
		
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(4, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_num",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hidden_tupcount",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "bitmap",
						   TEXTOID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->parentRelation = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		AppendOnlyVisimap_Init(&context->visiMap,
				context->parentRelation->rd_appendonly->visimaprelid,
				context->parentRelation->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);

		context->indexScan = AppendOnlyVisimapStore_BeginScan(&
			context->visiMap.visimapStore, 0, NULL);

		context->bitmapBuffer = palloc0(VARHDRSZ + APPENDONLY_VISIMAP_MAX_RANGE + 1);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	if (AppendOnlyVisimapStore_GetNext(&context->visiMap.visimapStore,
				context->indexScan,
				ForwardScanDirection,
				&context->visiMap.visimapEntry,
				NULL))
	{
		AppendOnlyVisimapEntry *visimapEntry = &context->visiMap.visimapEntry;

		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = Int32GetDatum(visimapEntry->segmentFileNum);
		values[1] = Int64GetDatum(visimapEntry->firstRowNum);
		values[2] = Int32GetDatum(
				(int32)AppendOnlyVisimapEntry_GetHiddenTupleCount(visimapEntry));
		
		gp_aovisimap_encode_bitmap(VARDATA(context->bitmapBuffer), 
				visimapEntry->bitmap);
		SET_VARSIZE(context->bitmapBuffer, APPENDONLY_VISIMAP_MAX_RANGE);
		values[3] = PointerGetDatum(context->bitmapBuffer);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimapStore_EndScan(&context->visiMap.visimapStore,
			context->indexScan);
	AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock);
	heap_close(context->parentRelation, AccessShareLock);

	pfree(context->bitmapBuffer);
	pfree(context);
	funcctx->user_fctx = NULL;

	SRF_RETURN_DONE(funcctx);
}
Example #26
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 *
 *		The return value indicates whether this function has held off
 *		interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
 */
bool
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
	bool		heldoff = false;

	pg_rusage_init(&ru0);

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
		starttime = GetCurrentTimestamp();

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return false;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 *
	 * Note that after we've truncated the heap, it's too late to abort the
	 * transaction; doing so would lose the sinval messages needed to tell
	 * the other backends about the table being shrunk.  We prevent interrupts
	 * in that case; caller is responsible for re-enabling them after
	 * committing the transaction.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
	{
		HOLD_INTERRUPTS();
		heldoff = true;
		lazy_truncate_heap(onerel, vacrelstats);
	}

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	if (vacrelstats->tot_free_pages > MaxFSMPages)
		ereport(WARNING,
				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
						get_namespace_name(RelationGetNamespace(onerel)),
						RelationGetRelationName(onerel)),
				 /* Only suggest VACUUM FULL if > 20% free */
				 (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
				 errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
				 errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));

	/* Update statistics in pg_class */
	vac_update_relstats_from_list(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);

	if (gp_indexcheck_vacuum == INDEX_CHECK_ALL ||
		(gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM &&
		 PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel)))
	{
		int			i;

		for (i = 0; i < nindexes; i++)
		{
			if (Irel[i]->rd_rel->relam == BTREE_AM_OID)
				_bt_validate_vacuum(Irel[i], onerel, OldestXmin);
		}
	}

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
									   Log_autovacuum_min_duration))
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
						  vacrelstats->pages_removed, vacrelstats->rel_pages,
						vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
							pg_rusage_show(&ru0))));
	}

	return heldoff;
}
Example #27
0
/* ----------------------------------------------------------------
 *		ExecInsert
 *
 *		INSERTs have to add the tuple into
 *		the base relation and insert appropriate tuples into the
 *		index relations.
 *		Insert can be part of an update operation when
 *		there is a preceding SplitUpdate node. 
 * ----------------------------------------------------------------
 */
void
ExecInsert(TupleTableSlot *slot,
		   DestReceiver *dest,
		   EState *estate,
		   PlanGenerator planGen,
		   bool isUpdate)
{
	void		*tuple = NULL;
	ResultRelInfo *resultRelInfo = NULL;
	Relation	resultRelationDesc = NULL;
	Oid			newId = InvalidOid;
	TupleTableSlot *partslot = NULL;

	AOTupleId	aoTupleId = AOTUPLEID_INIT;

	bool		rel_is_heap = false;
	bool 		rel_is_aorows = false;
	bool		rel_is_aocols = false;
	bool		rel_is_external = false;

	/*
	 * get information on the (current) result relation
	 */
	if (estate->es_result_partitions)
	{
		resultRelInfo = slot_get_partition(slot, estate);

		/* Check whether the user provided the correct leaf part only if required */
		if (!dml_ignore_target_partition_check)
		{
			Assert(NULL != estate->es_result_partitions->part &&
					NULL != resultRelInfo->ri_RelationDesc);

			List *resultRelations = estate->es_plannedstmt->resultRelations;
			/*
			 * Only inheritance can generate multiple result relations and inheritance
			 * is not compatible with partitions. As we are in inserting in partitioned
			 * table, we should not have more than one resultRelation
			 */
			Assert(list_length(resultRelations) == 1);
			/* We only have one resultRelations entry where the user originally intended to insert */
			int rteIdxForUserRel = linitial_int(resultRelations);
			Assert (rteIdxForUserRel > 0);
			Oid userProvidedRel = InvalidOid;

			if (1 == rteIdxForUserRel)
			{
				/* Optimization for typical case */
				userProvidedRel = ((RangeTblEntry *) estate->es_plannedstmt->rtable->head->data.ptr_value)->relid;
			}
			else
			{
				userProvidedRel = getrelid(rteIdxForUserRel, estate->es_plannedstmt->rtable);
			}

			/* Error out if user provides a leaf partition that does not match with our calculated partition */
			if (userProvidedRel != estate->es_result_partitions->part->parrelid &&
				userProvidedRel != resultRelInfo->ri_RelationDesc->rd_id)
			{
				ereport(ERROR,
						(errcode(ERRCODE_CHECK_VIOLATION),
						 errmsg("Trying to insert row into wrong partition"),
						 errdetail("Expected partition: %s, provided partition: %s",
							resultRelInfo->ri_RelationDesc->rd_rel->relname.data,
							estate->es_result_relation_info->ri_RelationDesc->rd_rel->relname.data)));
			}
		}
		estate->es_result_relation_info = resultRelInfo;
	}
	else
	{
		resultRelInfo = estate->es_result_relation_info;
	}

	Assert (!resultRelInfo->ri_projectReturning);

	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	rel_is_heap = RelationIsHeap(resultRelationDesc);
	rel_is_aocols = RelationIsAoCols(resultRelationDesc);
	rel_is_aorows = RelationIsAoRows(resultRelationDesc);
	rel_is_external = RelationIsExternal(resultRelationDesc);

	partslot = reconstructMatchingTupleSlot(slot, resultRelInfo);
	if (rel_is_heap)
	{
		tuple = ExecFetchSlotHeapTuple(partslot);
	}
	else if (rel_is_aorows)
	{
		tuple = ExecFetchSlotMemTuple(partslot, false);
	}
	else if (rel_is_external) 
	{
		if (estate->es_result_partitions && 
			estate->es_result_partitions->part->parrelid != 0)
		{
			ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				errmsg("Insert into external partitions not supported.")));			
			return;
		}
		else
		{
			tuple = ExecFetchSlotHeapTuple(partslot);
		}
	}
	else
	{
		Assert(rel_is_aocols);
		tuple = ExecFetchSlotMemTuple(partslot, true);
	}

	Assert(partslot != NULL && tuple != NULL);

	/* Execute triggers in Planner-generated plans */
	if (planGen == PLANGEN_PLANNER)
	{
		/* BEFORE ROW INSERT Triggers */
		if (resultRelInfo->ri_TrigDesc &&
			resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
		{
			HeapTuple	newtuple;

			/* NYI */
			if(rel_is_aocols)
				elog(ERROR, "triggers are not supported on tables that use column-oriented storage");

			newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);

			if (newtuple == NULL)	/* "do nothing" */
			{
				return;
			}

			if (newtuple != tuple)	/* modified by Trigger(s) */
			{
				/*
				 * Put the modified tuple into a slot for convenience of routines
				 * below.  We assume the tuple was allocated in per-tuple memory
				 * context, and therefore will go away by itself. The tuple table
				 * slot should not try to clear it.
				 */
				TupleTableSlot *newslot = estate->es_trig_tuple_slot;

				if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor)
					ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor);
				ExecStoreGenericTuple(newtuple, newslot, false);
				newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */
				tuple = newtuple;
				partslot = newslot;
			}
		}
	}
	/*
	 * Check the constraints of the tuple
	 */
	if (resultRelationDesc->rd_att->constr &&
			planGen == PLANGEN_PLANNER)
	{
		ExecConstraints(resultRelInfo, partslot, estate);
	}
	/*
	 * insert the tuple
	 *
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
	 *
	 * NOTE: for append-only relations we use the append-only access methods.
	 */
	if (rel_is_aorows)
	{
		if (resultRelInfo->ri_aoInsertDesc == NULL)
		{
			/* Set the pre-assigned fileseg number to insert into */
			ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);

			resultRelInfo->ri_aoInsertDesc =
				appendonly_insert_init(resultRelationDesc,
									   ActiveSnapshot,
									   resultRelInfo->ri_aosegno,
									   false);

		}

		appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId);
	}
	else if (rel_is_aocols)
	{
		if (resultRelInfo->ri_aocsInsertDesc == NULL)
		{
			ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
			resultRelInfo->ri_aocsInsertDesc = aocs_insert_init(resultRelationDesc, 
																resultRelInfo->ri_aosegno, false);
		}

		newId = aocs_insert(resultRelInfo->ri_aocsInsertDesc, partslot);
		aoTupleId = *((AOTupleId*)slot_get_ctid(partslot));
	}
	else if (rel_is_external)
	{
		/* Writable external table */
		if (resultRelInfo->ri_extInsertDesc == NULL)
			resultRelInfo->ri_extInsertDesc = external_insert_init(resultRelationDesc);

		newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple);
	}
	else
	{
		Insist(rel_is_heap);

		newId = heap_insert(resultRelationDesc,
							tuple,
							estate->es_snapshot->curcid,
							true, true, GetCurrentTransactionId());
	}

	IncrAppended();
	(estate->es_processed)++;
	(resultRelInfo->ri_aoprocessed)++;
	estate->es_lastoid = newId;

	partslot->tts_tableOid = RelationGetRelid(resultRelationDesc);

	if (rel_is_aorows || rel_is_aocols)
	{
		/*
		 * insert index entries for AO Row-Store tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false);
	}
	else
	{
		/* Use parttuple for index update in case this is an indexed heap table. */
		TupleTableSlot *xslot = partslot;
		void *xtuple = tuple;

		setLastTid(&(((HeapTuple) xtuple)->t_self));

		/*
		 * insert index entries for tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false);

	}

	if (planGen == PLANGEN_PLANNER)
	{
		/* AFTER ROW INSERT Triggers */
		ExecARInsertTriggers(estate, resultRelInfo, tuple);
	}
}
Example #28
0
void
AlterTableCreateAoSegTable(Oid relOid, bool is_part_child, bool is_part_parent)
{
	TupleDesc	tupdesc;
	Relation	rel;
	const char *prefix;

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if(RelationIsAoRows(rel))
	{
		prefix = "pg_aoseg";

		/* this is pretty painful...  need a tuple descriptor */
		tupdesc = CreateTemplateTupleDesc(8, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1,
						"segno",
						INT4OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2,
						"eof",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3,
						"tupcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4,
						"varblockcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5,
						"eofuncompressed",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6,
						"modcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7,
						"formatversion",
						INT2OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 8,
						"state",
						INT2OID,
						-1, 0);

	}
	else if (RelationIsAoCols(rel))
	{
		prefix = "pg_aocsseg";

		/*
		 * XXX
		 * At this moment, we hardwire the rel aocs info.
		 * Essentially, we assume total vertical partition, and
		 * we do not do datatype specific compression.
		 *
		 * In order to make things right, we need to first fix
		 * the DefineRelation, so that we store the per column
		 * info, then, we need to open the catalog, pull out
		 * info here.
		 */

		/*
		 * XXX We do not handle add/drop column etc nicely yet.
		 */

		/*
		 * Assuming full vertical partition, we want to include
		 * the following in the seg table.
		 *
		 * segno int,               -- whatever purpose ao use it
		 * tupcount bigint          -- total tup
		 * varblockcount bigint,    -- total varblock
		 * vpinfo varbinary(max)    -- vertical partition info encoded in 
		 *                             binary. NEEDS TO BE REFACTORED
		 *                             INTO MULTIPLE COLUMNS!!
		 * state (smallint)         -- state of the segment file
		 */

		tupdesc = CreateTemplateTupleDesc(7, false);

		TupleDescInitEntry(tupdesc, (AttrNumber) 1,
						   "segno",
						   INT4OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2,
						   "tupcount",
						   INT8OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3,
						   "varblockcount",
						   INT8OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4,
						   "vpinfo",
						   BYTEAOID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5,
						"modcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6,
						   "formatversion",
						   INT2OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7,
						   "state",
						   INT2OID,
						   -1, 0);
	}
	else
	{
		heap_close(rel, NoLock);
		return;
	}

	(void) CreateAOAuxiliaryTable(rel, prefix, RELKIND_AOSEGMENTS,
								  tupdesc,
								  NULL, NIL, NULL, NULL, is_part_parent);

	heap_close(rel, NoLock);
}
Example #29
0
/*
 * Truncates each segment file to the AOCS relation to its EOF.
 * If we cannot get a lock on the segment file (because e.g. a concurrent insert)
 * the segment file is skipped.
 */
void
AOCSTruncateToEOF(Relation aorel)
{
	const char *relname;
	int			total_segfiles;
	AOCSFileSegInfo **segfile_array;
	int			i,
				segno;
	LockAcquireResult acquireResult;
	AOCSFileSegInfo *fsinfo;
	Snapshot	appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));

	Assert(RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Compact AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, &total_segfiles);

	for (i = 0; i < total_segfiles; i++)
	{
		segno = segfile_array[i]->segno;

		/*
		 * Try to get the transaction write-lock for the Append-Only segment
		 * file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until
		 * commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
														  &aorel->rd_node,
														  segfile_array[i]->segno,
														  AccessExclusiveLock,
														   /* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "truncate skips AO segfile %d, "
				 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, segno);

		/*
		 * This should not occur since this segfile info was found by the
		 * "all" method, but better to catch for trouble shooting (possibly
		 * index corruption?)
		 */
		if (fsinfo == NULL)
			elog(ERROR, "file seginfo for AOCS relation %s %u/%u/%u (segno=%u) is missing",
				 relname,
				 aorel->rd_node.spcNode,
				 aorel->rd_node.dbNode,
				 aorel->rd_node.relNode,
				 segno);

		AOCSSegmentFileTruncateToEOF(aorel, fsinfo);
		pfree(fsinfo);
	}

	if (segfile_array)
	{
		FreeAllAOCSSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
	UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}