Beispiel #1
0
/*
 * Fills in the relation statistics for an append-only relation.
 *
 *	This information is used to update the reltuples and relpages information
 *	in pg_class. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *	column and we simulate relpages by subdividing the eof value
 *	("pg_aoseg_<oid>:eof") over the defined page size.
 */
void
vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot,
							 BlockNumber *rel_pages, double *rel_tuples,
							 bool *relhasindex)
{
	FileSegTotals *fstotal;
	BlockNumber nblocks;
	char	   *relname;
	double		num_tuples;
	double		totalbytes;
	double		eof;
	int64       hidden_tupcount;
	AppendOnlyVisimap visimap;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));

	relname = RelationGetRelationName(aorel);

	/* get updated statistics from the pg_aoseg table */
	if (RelationIsAoRows(aorel))
	{
		fstotal = GetSegFilesTotals(aorel, snapshot);
	}
	else
	{
		Assert(RelationIsAoCols(aorel));
		fstotal = GetAOCSSSegFilesTotals(aorel, snapshot);
	}

	/* calculate the values we care about */
	eof = (double)fstotal->totalbytes;
	num_tuples = (double)fstotal->totaltuples;
	totalbytes = eof;
	nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes);

	AppendOnlyVisimap_Init(&visimap,
						   aorel->rd_appendonly->visimaprelid,
						   aorel->rd_appendonly->visimapidxid,
						   AccessShareLock,
						   snapshot);
	hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap);
	num_tuples -= hidden_tupcount;
	Assert(num_tuples > -1.0);
	AppendOnlyVisimap_Finish(&visimap, AccessShareLock);

	elogif (Debug_appendonly_print_compaction, LOG,
			"Gather statistics after vacuum for append-only relation %s: "
			"page count %d, tuple count %f",
			relname,
			nblocks, num_tuples);

	*rel_pages = nblocks;
	*rel_tuples = num_tuples;
	*relhasindex = aorel->rd_rel->relhasindex;

	ereport(elevel,
			(errmsg("\"%s\": found %.0f rows in %u pages.",
					relname, num_tuples, nblocks)));
	pfree(fstotal);
}
/*
 * Returns true if the relation has no tuples.  Prepare phase of
 * compaction invokes this function on each QE.
 *
 * Examples of empty tables:
 * 1. parent of a partitioned table
 * 2. table that is created but no tuples have been inserted yet
 * 3. table from which all existing tuples are deleted and the table
 * is vacuumed.  This is a special case in which pg_aoseg_<oid> has
 * non-zero number of rows but tupcount value is zero for all rows.
 */
bool
AppendOnlyCompaction_IsRelationEmpty(Relation aorel)
{
	AppendOnlyEntry *aoEntry;
	Relation		pg_aoseg_rel;
	TupleDesc		pg_aoseg_dsc;
	HeapTuple		tuple;
	HeapScanDesc	aoscan;
	int				Anum_tupcount;
	bool empty = true;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));

	aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow);
	pg_aoseg_rel = heap_open(aoEntry->segrelid, AccessShareLock);
	pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel);
	aoscan = heap_beginscan(pg_aoseg_rel, SnapshotNow, 0, NULL);
	Anum_tupcount = RelationIsAoRows(aorel)? Anum_pg_aoseg_tupcount: Anum_pg_aocs_tupcount;
	while ((tuple = heap_getnext(aoscan, ForwardScanDirection)) != NULL &&
		   empty)
	{
		if (0 < fastgetattr(tuple, Anum_tupcount,
							pg_aoseg_dsc, NULL))
			empty = false;
	}
	heap_endscan(aoscan);
	heap_close(pg_aoseg_rel, AccessShareLock);
	return empty;
}
Beispiel #3
0
static void gp_statistics_estimate_reltuples_relpages_ao_rows(Relation rel, float4 *reltuples, float4 *relpages)
{
	FileSegTotals		*fstotal;
	AppendOnlyEntry *aoEntry;
	AppendOnlyVisimap visimap;
	int64 hidden_tupcount = 0;
	/**
	 * Ensure that the right kind of relation with the right type of storage is passed to us.
	 */
	Assert(rel->rd_rel->relkind == RELKIND_RELATION);
	Assert(RelationIsAoRows(rel));
	
	fstotal = GetSegFilesTotals(rel, SnapshotNow);
	Assert(fstotal);
	/**
	 * The planner doesn't understand AO's blocks, so need this method to try to fudge up a number for
	 * the planner. 
	 */
	*relpages = RelationGuessNumberOfBlocks((double)fstotal->totalbytes);

	aoEntry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow);
	AppendOnlyVisimap_Init(&visimap, aoEntry->visimaprelid, aoEntry->visimapidxid, AccessShareLock, SnapshotNow);
	hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&visimap);
	AppendOnlyVisimap_Finish(&visimap, AccessShareLock);

	/**
	 * The number of tuples in AO table is known accurately. Therefore, we just utilize this value.
	 */
	*reltuples = (double)(fstotal->totaltuples - hidden_tupcount);

	pfree(fstotal);
	pfree(aoEntry);
	
	return;
}
static void LockSegfilesOnMasterForSingleRel(Relation rel, int32 segno) {
  Insist(Gp_role == GP_ROLE_DISPATCH);

  /*
   * do not lock segfile with content id = -1
   */
  /*
   for (i = 1; i < rel->rd_segfile0_count; ++i)
   {
   if (RelationIsAoRows(rel) || RelationIsParquet(rel))
   {
   LockRelationAppendOnlySegmentFile(&rel->rd_node, segno,
   AccessExclusiveLock, false, i - 1);
   }
   }
   */
  {
    if (RelationIsAoRows(rel) || RelationIsParquet(rel)) {
      LockRelationAppendOnlySegmentFile(&rel->rd_node, segno,
      AccessExclusiveLock,
                                        false);
    }
  }

}
Beispiel #5
0
/*
 * calculate size of (one fork of) a relation
 *
 * Iterator over all files belong to the relation and do stat.
 * The obviously better way is to use glob.  For whatever reason,
 * glob is extremely slow if there are lots of relations in the
 * database.  So we handle all cases, instead. 
 *
 * Note: we can safely apply this to temp tables of other sessions, so there
 * is no check here or at the call sites for that.
 */
static int64
calculate_relation_size(Relation rel, ForkNumber forknum)
{
	int64		totalsize = 0;
	char	   *relationpath;
	char		pathname[MAXPGPATH];
	unsigned int segcount = 0;

	relationpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum);

if (RelationIsHeap(rel))
{
	/* Ordinary relation, including heap and index.
	 * They take form of relationpath, or relationpath.%d
	 * There will be no holes, therefore, we can stop when
	 * we reach the first non-existing file.
	 */
	for (segcount = 0;; segcount++)
	{
		struct stat fst;

		CHECK_FOR_INTERRUPTS();

		if (segcount == 0)
			snprintf(pathname, MAXPGPATH, "%s",
					 relationpath);
		else
			snprintf(pathname, MAXPGPATH, "%s.%u",
					 relationpath, segcount);

		if (stat(pathname, &fst) < 0)
		{
			if (errno == ENOENT)
				break;
			else
				ereport(ERROR,
						(errcode_for_file_access(),
						 errmsg("could not stat file %s: %m", pathname)));
		}
		totalsize += fst.st_size;
	}
}
/* AO tables don't have any extra forks. */
else if (forknum == MAIN_FORKNUM)
{
	if (RelationIsAoRows(rel))
	{
		totalsize = GetAOTotalBytes(rel, GetActiveSnapshot());
	}
	else if (RelationIsAoCols(rel))
	{
		totalsize = GetAOCSTotalBytes(rel, GetActiveSnapshot(), true);
	}
}

	/* RELSTORAGE_VIRTUAL has no space usage */
	return totalsize;
}
Beispiel #6
0
/**
 * Given the oid of a relation, this method calculates reltuples, relpages. This only looks up
 * local information (on master or segments). It produces meaningful values for AO and
 * heap tables and returns [0.0,0.0] for all other relations.
 * Input: 
 * 	relationoid
 * Output:
 * 	array of two values [reltuples,relpages]
 */
Datum
gp_statistics_estimate_reltuples_relpages_oid(PG_FUNCTION_ARGS)
{
	
	float4		relpages = 0.0;		
	float4		reltuples = 0.0;			
	Oid			relOid = PG_GETARG_OID(0);
	Datum		values[2];
	ArrayType   *result;
	
	Relation rel = try_relation_open(relOid, AccessShareLock, false);

	if (rel != NULL)
	{
		if (rel->rd_rel->relkind == RELKIND_RELATION)
		{
			if (RelationIsHeap(rel))
			{
				gp_statistics_estimate_reltuples_relpages_heap(rel, &reltuples, &relpages);
			}
			else if (RelationIsAoRows(rel))
			{
				gp_statistics_estimate_reltuples_relpages_ao_rows(rel, &reltuples, &relpages);
			}
			else if	(RelationIsAoCols(rel))
			{
				gp_statistics_estimate_reltuples_relpages_ao_cs(rel, &reltuples, &relpages);
			}
		}
		else if (rel->rd_rel->relkind == RELKIND_INDEX)
		{
			reltuples = 1.0;
			relpages = RelationGetNumberOfBlocks(rel);
		}
		else
		{
			/**
			 * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1.
			 */
		}
		relation_close(rel, AccessShareLock);
	}
	else
	{
		/**
		 * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1.
		 */
	}
	
	values[0] = Float4GetDatum(reltuples);
	values[1] = Float4GetDatum(relpages);

	result = construct_array(values, 2,
					FLOAT4OID,
					sizeof(float4), true, 'i');

	PG_RETURN_ARRAYTYPE_P(result);
}
/*
 * AppendOnlySegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AppendOnlySegmentFileTruncateToEOF(Relation aorel, 
		FileSegInfo *fsinfo)
{
	const char* relname = RelationGetRelationName(aorel);
	MirroredAppendOnlyOpen mirroredOpened;
	int32				   fileSegNo;
	char			filenamepath[MAXPGPATH];
	int				segno;
	int64			segeof;

	Assert(fsinfo);
	Assert(RelationIsAoRows(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);
	segeof = (int64)fsinfo->eof;

	/* Open and truncate the relation segfile beyond its eof */
	MakeAOSegmentFileName(aorel, segno, -1, &fileSegNo, filenamepath);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Opening AO relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
		   get_namespace_name(RelationGetNamespace(aorel)),
		   relname,
		   aorel->rd_id,
		   aorel->rd_node.relNode,
		   segno,
		   segeof);

	if (OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof, &mirroredOpened))
	{
		TruncateAOSegmentFile(&mirroredOpened, aorel, segeof, ERROR);
		CloseAOSegmentFile(&mirroredOpened);

		elogif(Debug_appendonly_print_compaction, LOG,
				 "Successfully truncated AO ROL relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
				 "No gp_relation_node entry for AO ROW relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
}
Beispiel #8
0
/*
 * calculate size of a relation
 *
 * Iterator over all files belong to the relation and do stat.
 * The obviously better way is to use glob.  For whatever reason,
 * glob is extremely slow if there are lots of relations in the
 * database.  So we handle all cases, instead. 
 */
int64
calculate_relation_size(Relation rel)
{
	int64		totalsize = 0;
	char	   *relationpath;
	char		pathname[MAXPGPATH];

    struct stat fst;
    int i;

	relationpath = relpath(rel->rd_node);

    if(RelationIsHeap(rel))
    {
        /* Ordinary relation, including heap and index.
         * They take form of relationpath, or relationpath.%d
         * There will be no holes, therefore, we can stop we
         * we reach the first non-exist file.
         */
        for(i=0; ; ++i)
        {
            if (i==0)
                snprintf(pathname, MAXPGPATH, "%s", relationpath); 
            else
                snprintf(pathname, MAXPGPATH, "%s.%d", relationpath, i);

            if (stat(pathname, &fst) >= 0)
                totalsize += fst.st_size;
            else
            {
                if (errno == ENOENT)
                    break;
                else
                    ereport(ERROR, (errcode_for_file_access(), 
                                    errmsg("could not stat file %s: %m", pathname)
                                ));
            }
        }
    }
	else if (RelationIsAoRows(rel))
		totalsize = GetAOTotalBytes(rel, SnapshotNow);
	else if (RelationIsParquet(rel))
		totalsize = GetParquetTotalBytes(rel, SnapshotNow);
           
    /* RELSTORAGE_VIRTUAL has no space usage */
    return totalsize;
}
/*
 * Has the same signature as RelationGetAttributeCompressionFuncs() even though
 * we don't actually need the full Relation data structure. I deem consistency
 * of API more important in this case.
 */
PGFunction *
RelationGetRelationCompressionFuncs(Relation rel)
{
	AppendOnlyEntry *aoentry;
	char *comptype = NULL;
	PGFunction *compFuncs;

	if(RelationIsAoRows(rel) || RelationIsParquet(rel)){
		aoentry = GetAppendOnlyEntry(RelationGetRelid(rel), SnapshotNow);
		comptype = aoentry->compresstype;
	}

	compFuncs =	get_funcs_for_compression(comptype);

	return compFuncs;

}
Beispiel #10
0
/*
 *	Compute the on-disk size of files for the relation according to the
 *	stat function, including heap data, index data, toast data, aoseg data,
 *  aoblkdir data, and aovisimap data.
 */
static int64
calculate_total_relation_size(Oid Relid)
{
	Relation	heapRel;
	Oid			toastOid;
	AppendOnlyEntry *aoEntry = NULL;
	int64		size;
	ListCell   *cell;

	heapRel = try_relation_open(Relid, AccessShareLock, false);

	if (!RelationIsValid(heapRel))
		return 0;

	toastOid = heapRel->rd_rel->reltoastrelid;

	if (RelationIsAoRows(heapRel) || RelationIsAoCols(heapRel))
		aoEntry = GetAppendOnlyEntry(Relid, SnapshotNow);
	
	/* Get the heap size */
	if (Relid == 0 || heapRel->rd_node.relNode == 0)
		size = 0;
	else
		size = calculate_relation_size(heapRel); 

	/* Include any dependent indexes */
	if (heapRel->rd_rel->relhasindex)
	{
		List	   *index_oids = RelationGetIndexList(heapRel);

		foreach(cell, index_oids)
		{
			Oid			idxOid = lfirst_oid(cell);
			Relation	iRel;

			iRel = try_relation_open(idxOid, AccessShareLock, false);

			if (RelationIsValid(iRel))
			{
				size += calculate_relation_size(iRel); 

				relation_close(iRel, AccessShareLock);
			}
		}
Beispiel #11
0
/*
 * getTableType
 *   Return the table type for a given relation.
 */
int
getTableType(Relation rel)
{
	Assert(rel != NULL && rel->rd_rel != NULL);
	
	if (RelationIsHeap(rel))
	{
		return TableTypeHeap;
	}

	if (RelationIsAoRows(rel))
	{
		return TableTypeAppendOnly;
	}
	
	if (RelationIsParquet(rel))
	{
		return TableTypeParquet;
	}

	elog(ERROR, "undefined table type for storage format: %c", rel->rd_rel->relstorage);
	return TableTypeInvalid;
}
Beispiel #12
0
/*
 * InitScanStateRelationDetails
 *   Opens a relation and sets various relation specific ScanState fields.
 */
void
InitScanStateRelationDetails(ScanState *scanState, Plan *plan, EState *estate)
{
	Assert(NULL != scanState);
	PlanState *planState = &scanState->ps;

	/* Initialize child expressions */
	planState->targetlist = (List *)ExecInitExpr((Expr *)plan->targetlist, planState);
	planState->qual = (List *)ExecInitExpr((Expr *)plan->qual, planState);

	Relation currentRelation = ExecOpenScanRelation(estate, ((Scan *)plan)->scanrelid);
	scanState->ss_currentRelation = currentRelation;

  if (RelationIsAoRows(currentRelation) || RelationIsParquet(currentRelation))
  {
    scanState->splits = GetFileSplitsOfSegment(estate->es_plannedstmt->scantable_splits,
                    currentRelation->rd_id, GetQEIndex());
  }

	ExecAssignScanType(scanState, RelationGetDescr(currentRelation));
	ExecAssignScanProjectionInfo(scanState);

	scanState->tableType = getTableType(scanState->ss_currentRelation);
}
Beispiel #13
0
/*
 *	vacuum_appendonly_rel() -- vaccum an append-only relation
 *
 *		This procedure will be what gets executed both for VACUUM
 *		and VACUUM FULL (and also ANALYZE or any other thing that
 *		needs the pg_class stats updated).
 *
 *		The function can compact append-only segment files or just
 *		truncating the segment file to its existing eof.
 *
 *		Afterwards, the reltuples and relpages information in pg_class
 *		are updated. reltuples is the same as "pg_aoseg_<oid>:tupcount"
 *		column and we simulate relpages by subdividing the eof value
 *		("pg_aoseg_<oid>:eof") over the defined page size.
 *
 *
 *		There are txn ids, hint bits, free space, dead tuples,
 *		etc. these are all irrelevant in the append only relation context.
 *
 */
void
vacuum_appendonly_rel(Relation aorel, VacuumStmt *vacstmt)
{
	char	   *relname;
	PGRUsage	ru0;

	Assert(RelationIsAoRows(aorel) || RelationIsAoCols(aorel));
	Assert(!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt));

	pg_rusage_init(&ru0);
	relname = RelationGetRelationName(aorel);
	ereport(elevel,
			(errmsg("vacuuming \"%s.%s\"",
					get_namespace_name(RelationGetNamespace(aorel)),
					relname)));

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		return;
	}
	Assert(list_length(vacstmt->appendonly_compaction_insert_segno) <= 1);
	if (vacstmt->appendonly_compaction_insert_segno == NULL)
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum drop phase %s", RelationGetRelationName(aorel));

		if (RelationIsAoRows(aorel))
		{
			AppendOnlyDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
		else
		{
			Assert(RelationIsAoCols(aorel));
			AOCSDrop(aorel, vacstmt->appendonly_compaction_segno);
		}
	}
	else
	{
		int insert_segno = linitial_int(vacstmt->appendonly_compaction_insert_segno);
		if (insert_segno == APPENDONLY_COMPACTION_SEGNO_INVALID)
		{
			elogif(Debug_appendonly_print_compaction, LOG,
			"Vacuum pseudo-compaction phase %s", RelationGetRelationName(aorel));
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				"Vacuum compaction phase %s", RelationGetRelationName(aorel));
			if (RelationIsAoRows(aorel))
			{
				AppendOnlyCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
			else
			{
				Assert(RelationIsAoCols(aorel));
				AOCSCompact(aorel,
					vacstmt->appendonly_compaction_segno,
					insert_segno, vacstmt->full);
			}
		}
	}
}
Beispiel #14
0
/* ----------------------------------------------------------------
 *		ExecInsert
 *
 *		INSERTs have to add the tuple into
 *		the base relation and insert appropriate tuples into the
 *		index relations.
 *		Insert can be part of an update operation when
 *		there is a preceding SplitUpdate node. 
 * ----------------------------------------------------------------
 */
void
ExecInsert(TupleTableSlot *slot,
		   DestReceiver *dest,
		   EState *estate,
		   PlanGenerator planGen,
		   bool isUpdate)
{
	void		*tuple = NULL;
	ResultRelInfo *resultRelInfo = NULL;
	Relation	resultRelationDesc = NULL;
	Oid			newId = InvalidOid;
	TupleTableSlot *partslot = NULL;

	AOTupleId	aoTupleId = AOTUPLEID_INIT;

	bool		rel_is_heap = false;
	bool 		rel_is_aorows = false;
	bool		rel_is_aocols = false;
	bool		rel_is_external = false;

	/*
	 * get information on the (current) result relation
	 */
	if (estate->es_result_partitions)
	{
		resultRelInfo = slot_get_partition(slot, estate);

		/* Check whether the user provided the correct leaf part only if required */
		if (!dml_ignore_target_partition_check)
		{
			Assert(NULL != estate->es_result_partitions->part &&
					NULL != resultRelInfo->ri_RelationDesc);

			List *resultRelations = estate->es_plannedstmt->resultRelations;
			/*
			 * Only inheritance can generate multiple result relations and inheritance
			 * is not compatible with partitions. As we are in inserting in partitioned
			 * table, we should not have more than one resultRelation
			 */
			Assert(list_length(resultRelations) == 1);
			/* We only have one resultRelations entry where the user originally intended to insert */
			int rteIdxForUserRel = linitial_int(resultRelations);
			Assert (rteIdxForUserRel > 0);
			Oid userProvidedRel = InvalidOid;

			if (1 == rteIdxForUserRel)
			{
				/* Optimization for typical case */
				userProvidedRel = ((RangeTblEntry *) estate->es_plannedstmt->rtable->head->data.ptr_value)->relid;
			}
			else
			{
				userProvidedRel = getrelid(rteIdxForUserRel, estate->es_plannedstmt->rtable);
			}

			/* Error out if user provides a leaf partition that does not match with our calculated partition */
			if (userProvidedRel != estate->es_result_partitions->part->parrelid &&
				userProvidedRel != resultRelInfo->ri_RelationDesc->rd_id)
			{
				ereport(ERROR,
						(errcode(ERRCODE_CHECK_VIOLATION),
						 errmsg("Trying to insert row into wrong partition"),
						 errdetail("Expected partition: %s, provided partition: %s",
							resultRelInfo->ri_RelationDesc->rd_rel->relname.data,
							estate->es_result_relation_info->ri_RelationDesc->rd_rel->relname.data)));
			}
		}
		estate->es_result_relation_info = resultRelInfo;
	}
	else
	{
		resultRelInfo = estate->es_result_relation_info;
	}

	Assert (!resultRelInfo->ri_projectReturning);

	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	rel_is_heap = RelationIsHeap(resultRelationDesc);
	rel_is_aocols = RelationIsAoCols(resultRelationDesc);
	rel_is_aorows = RelationIsAoRows(resultRelationDesc);
	rel_is_external = RelationIsExternal(resultRelationDesc);

	partslot = reconstructMatchingTupleSlot(slot, resultRelInfo);
	if (rel_is_heap)
	{
		tuple = ExecFetchSlotHeapTuple(partslot);
	}
	else if (rel_is_aorows)
	{
		tuple = ExecFetchSlotMemTuple(partslot, false);
	}
	else if (rel_is_external) 
	{
		if (estate->es_result_partitions && 
			estate->es_result_partitions->part->parrelid != 0)
		{
			ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				errmsg("Insert into external partitions not supported.")));			
			return;
		}
		else
		{
			tuple = ExecFetchSlotHeapTuple(partslot);
		}
	}
	else
	{
		Assert(rel_is_aocols);
		tuple = ExecFetchSlotMemTuple(partslot, true);
	}

	Assert(partslot != NULL && tuple != NULL);

	/* Execute triggers in Planner-generated plans */
	if (planGen == PLANGEN_PLANNER)
	{
		/* BEFORE ROW INSERT Triggers */
		if (resultRelInfo->ri_TrigDesc &&
			resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
		{
			HeapTuple	newtuple;

			/* NYI */
			if(rel_is_aocols)
				elog(ERROR, "triggers are not supported on tables that use column-oriented storage");

			newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);

			if (newtuple == NULL)	/* "do nothing" */
			{
				return;
			}

			if (newtuple != tuple)	/* modified by Trigger(s) */
			{
				/*
				 * Put the modified tuple into a slot for convenience of routines
				 * below.  We assume the tuple was allocated in per-tuple memory
				 * context, and therefore will go away by itself. The tuple table
				 * slot should not try to clear it.
				 */
				TupleTableSlot *newslot = estate->es_trig_tuple_slot;

				if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor)
					ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor);
				ExecStoreGenericTuple(newtuple, newslot, false);
				newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */
				tuple = newtuple;
				partslot = newslot;
			}
		}
	}
	/*
	 * Check the constraints of the tuple
	 */
	if (resultRelationDesc->rd_att->constr &&
			planGen == PLANGEN_PLANNER)
	{
		ExecConstraints(resultRelInfo, partslot, estate);
	}
	/*
	 * insert the tuple
	 *
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
	 *
	 * NOTE: for append-only relations we use the append-only access methods.
	 */
	if (rel_is_aorows)
	{
		if (resultRelInfo->ri_aoInsertDesc == NULL)
		{
			/* Set the pre-assigned fileseg number to insert into */
			ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);

			resultRelInfo->ri_aoInsertDesc =
				appendonly_insert_init(resultRelationDesc,
									   ActiveSnapshot,
									   resultRelInfo->ri_aosegno,
									   false);

		}

		appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId);
	}
	else if (rel_is_aocols)
	{
		if (resultRelInfo->ri_aocsInsertDesc == NULL)
		{
			ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
			resultRelInfo->ri_aocsInsertDesc = aocs_insert_init(resultRelationDesc, 
																resultRelInfo->ri_aosegno, false);
		}

		newId = aocs_insert(resultRelInfo->ri_aocsInsertDesc, partslot);
		aoTupleId = *((AOTupleId*)slot_get_ctid(partslot));
	}
	else if (rel_is_external)
	{
		/* Writable external table */
		if (resultRelInfo->ri_extInsertDesc == NULL)
			resultRelInfo->ri_extInsertDesc = external_insert_init(resultRelationDesc);

		newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple);
	}
	else
	{
		Insist(rel_is_heap);

		newId = heap_insert(resultRelationDesc,
							tuple,
							estate->es_snapshot->curcid,
							true, true, GetCurrentTransactionId());
	}

	IncrAppended();
	(estate->es_processed)++;
	(resultRelInfo->ri_aoprocessed)++;
	estate->es_lastoid = newId;

	partslot->tts_tableOid = RelationGetRelid(resultRelationDesc);

	if (rel_is_aorows || rel_is_aocols)
	{
		/*
		 * insert index entries for AO Row-Store tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false);
	}
	else
	{
		/* Use parttuple for index update in case this is an indexed heap table. */
		TupleTableSlot *xslot = partslot;
		void *xtuple = tuple;

		setLastTid(&(((HeapTuple) xtuple)->t_self));

		/*
		 * insert index entries for tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false);

	}

	if (planGen == PLANGEN_PLANNER)
	{
		/* AFTER ROW INSERT Triggers */
		ExecARInsertTriggers(estate, resultRelInfo, tuple);
	}
}
Beispiel #15
0
void
AlterTableCreateAoBlkdirTable(Oid relOid, bool is_part_child)
{
	Relation	rel;
	TupleDesc	tupdesc;
	IndexInfo  *indexInfo;
	Oid			classObjectId[3];
	int16		coloptions[3];

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) {
		heap_close(rel, NoLock);
		return;
	}

	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(4, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "columngroup_no",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 4,
					   "minipage",
					   VARBITOID,
					   -1, 0);

	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';
    /* TODO (dmeister): In the next line, the index should have been 3. 
     * Therefore the minipage might be toasted.
     */
	tupdesc->attrs[2]->attstorage = 'p'; 

	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 3;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_KeyAttrNumbers[2] = 3;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_Concurrent = false;
	
	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;
	classObjectId[2] = INT8_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;
	coloptions[2] = 0;

	(void) CreateAOAuxiliaryTable(rel,
			"pg_aoblkdir",
			RELKIND_AOBLOCKDIR,
			tupdesc, indexInfo, classObjectId, coloptions);

	heap_close(rel, NoLock);
}
Beispiel #16
0
/* ----------------------------------------------------------------
 *		ExecDelete
 *
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed.
 *		DELETE can be part of an update operation when
 *		there is a preceding SplitUpdate node. 
 *
 * ----------------------------------------------------------------
 */
void
ExecDelete(ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
		   EState *estate,
		   PlanGenerator planGen,
		   bool isUpdate)
{
	ResultRelInfo *resultRelInfo;
	Relation resultRelationDesc;
	HTSU_Result result;
	ItemPointerData update_ctid;
	TransactionId update_xmax;

	/*
	 * Get information on the (current) result relation.
	 */
	if (estate->es_result_partitions && planGen == PLANGEN_OPTIMIZER)
	{
		Assert(estate->es_result_partitions->part->parrelid);

#ifdef USE_ASSERT_CHECKING
		Oid parent = estate->es_result_partitions->part->parrelid;
#endif

		/* Obtain part for current tuple. */
		resultRelInfo = slot_get_partition(planSlot, estate);
		estate->es_result_relation_info = resultRelInfo;

#ifdef USE_ASSERT_CHECKING
		Oid part = RelationGetRelid(resultRelInfo->ri_RelationDesc);
#endif

		Assert(parent != part);
	}
	else
	{
		resultRelInfo = estate->es_result_relation_info;
	}
	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	Assert (!resultRelInfo->ri_projectReturning);

	if (planGen == PLANGEN_PLANNER)
	{
		/* BEFORE ROW DELETE Triggers */
		if (resultRelInfo->ri_TrigDesc &&
			resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
		{
			bool		dodelete;

			dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
											estate->es_snapshot->curcid);

			if (!dodelete)			/* "do nothing" */
				return;
		}
	}

	bool isHeapTable = RelationIsHeap(resultRelationDesc);
	bool isAORowsTable = RelationIsAoRows(resultRelationDesc);
	bool isAOColsTable = RelationIsAoCols(resultRelationDesc);
	bool isExternalTable = RelationIsExternal(resultRelationDesc);

	if (isExternalTable && estate->es_result_partitions && 
		estate->es_result_partitions->part->parrelid != 0)
	{
		ereport(ERROR,
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			errmsg("Delete from external partitions not supported.")));			
		return;
	}
	/*
	 * delete the tuple
	 *
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
	 * serialize error if not.	This is a special-case behavior needed for
	 * referential integrity updates in serializable transactions.
	 */
ldelete:;
	if (isHeapTable)
	{
		result = heap_delete(resultRelationDesc, tupleid,
						 &update_ctid, &update_xmax,
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
						 true /* wait for commit */ );
	}
	else if (isAORowsTable)
	{
		if (IsXactIsoLevelSerializable)
		{
			if (!isUpdate)
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Deletes on append-only tables are not supported in serializable transactions.")));		
			else
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Updates on append-only tables are not supported in serializable transactions.")));	
		}

		if (resultRelInfo->ri_deleteDesc == NULL)
		{
			resultRelInfo->ri_deleteDesc = 
				appendonly_delete_init(resultRelationDesc, ActiveSnapshot);
		}

		AOTupleId* aoTupleId = (AOTupleId*)tupleid;
		result = appendonly_delete(resultRelInfo->ri_deleteDesc, aoTupleId);
	} 
	else if (isAOColsTable)
	{
		if (IsXactIsoLevelSerializable)
		{
			if (!isUpdate)
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Deletes on append-only tables are not supported in serializable transactions.")));		
			else
				ereport(ERROR,
					   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("Updates on append-only tables are not supported in serializable transactions.")));		
		}

		if (resultRelInfo->ri_deleteDesc == NULL)
		{
			resultRelInfo->ri_deleteDesc = 
				aocs_delete_init(resultRelationDesc);
		}

		AOTupleId* aoTupleId = (AOTupleId*)tupleid;
		result = aocs_delete(resultRelInfo->ri_deleteDesc, aoTupleId);
	}
	else
	{
		Insist(0);
	}
	switch (result)
	{
		case HeapTupleSelfUpdated:
			/* already deleted by self; nothing to do */
		
			/*
			 * In an scenario in which R(a,b) and S(a,b) have 
			 *        R               S
			 *    ________         ________
			 *     (1, 1)           (1, 2)
			 *                      (1, 7)
 			 *
   			 *  An update query such as:
 			 *   UPDATE R SET a = S.b  FROM S WHERE R.b = S.a;
 			 *   
 			 *  will have an non-deterministic output. The tuple in R 
			 * can be updated to (2,1) or (7,1).
 			 * Since the introduction of SplitUpdate, these queries will 
			 * send multiple requests to delete the same tuple. Therefore, 
			 * in order to avoid a non-deterministic output, 
			 * an error is reported in such scenario.
 			 */
			if (isUpdate)
			{

				ereport(ERROR,
					(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION ),
					errmsg("multiple updates to a row by the same query is not allowed")));
			}

			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
			if (IsXactIsoLevelSerializable)
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
						 errmsg("could not serialize access due to concurrent update")));
			else if (!ItemPointerEquals(tupleid, &update_ctid))
			{
				TupleTableSlot *epqslot;

				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
									   update_xmax,
									   estate->es_snapshot->curcid);
				if (!TupIsNull(epqslot))
				{
					*tupleid = update_ctid;
					goto ldelete;
				}
			}
			/* tuple already deleted; nothing to do */
			return;

		default:
			elog(ERROR, "unrecognized heap_delete status: %u", result);
			return;
	}

	if (!isUpdate)
	{
		IncrDeleted();
		(estate->es_processed)++;
		/*
		 * To notify master if tuples deleted or not, to update mod_count.
		 */
		(resultRelInfo->ri_aoprocessed)++;
	}

	/*
	 * Note: Normally one would think that we have to delete index tuples
	 * associated with the heap tuple now...
	 *
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
	 */


	if (planGen == PLANGEN_PLANNER)
	{
		/* AFTER ROW DELETE Triggers */
		ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
	}
}
Beispiel #17
0
/*
 * Returns true iff the given segment file should be compacted.
 */
bool
AppendOnlyCompaction_ShouldCompact(
	Relation aoRelation,
	AppendOnlyEntry *aoEntry,
	int segno,
	int64 segmentTotalTupcount,
	bool isFull)
{
	bool result;
	AppendOnlyVisimap visiMap;
	int64 hiddenTupcount;
	int hideRatio;

	Assert(RelationIsAoRows(aoRelation) || RelationIsAoCols(aoRelation));

	if (!gp_appendonly_compaction)
	{
		ereport(LOG,
			(errmsg("Append-only compaction skipped on relation %s, segment file num %d",
				RelationGetRelationName(aoRelation),
				segno),
			 errdetail("Compaction is disabled")));
		/* Disable compaction by global guc. */
		return false;
	}

	AppendOnlyVisimap_Init(&visiMap,
			aoEntry->visimaprelid,
			aoEntry->visimapidxid,
			ShareLock,
			SnapshotNow);
	hiddenTupcount = AppendOnlyVisimap_GetSegmentFileHiddenTupleCount(
			&visiMap, segno);

	result = true;
	if (isFull && hiddenTupcount > 0)
	{
		/* 
		 * if it is a full vacuum and there is any obsolete data, do a compaction
		 */
		result = true;
	}
	else
	{
	hideRatio = AppendOnlyCompaction_GetHideRatio(hiddenTupcount, segmentTotalTupcount);
	if (hideRatio <= gp_appendonly_compaction_threshold || gp_appendonly_compaction_threshold == 0)
	{
			if (hiddenTupcount > 0)
			{
				ereportif(Debug_appendonly_print_compaction, LOG, 
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d, "
					"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
					"hide ratio %d%%, threshold %d%%",
					RelationGetRelationName(aoRelation),
					segno,
					hiddenTupcount, segmentTotalTupcount, 
					hideRatio, gp_appendonly_compaction_threshold)));
				ereport(LOG,
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d",
					RelationGetRelationName(aoRelation),
					segno),
					errdetail("Ratio of obsolete tuples below threshold (%d%% vs %d%%)",
						hideRatio, gp_appendonly_compaction_threshold)));
			}
			else
			{
				ereportif(Debug_appendonly_print_compaction, LOG, 
					(errmsg("Append-only compaction skipped on relation %s, segment file num %d, "
					"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
					"hide ratio %d%%, threshold %d%%",
					RelationGetRelationName(aoRelation),
					segno,
					hiddenTupcount, segmentTotalTupcount, 
					hideRatio, gp_appendonly_compaction_threshold)));
			}
			result = false;
		}
		elogif(Debug_appendonly_print_compaction, LOG, 
			"Schedule compaction: "
			"segno %d, "
			"hidden tupcount " INT64_FORMAT ", total tupcount " INT64_FORMAT ", " 
			"hide ratio %d%%, threshold %d%%",
			segno,
			hiddenTupcount, segmentTotalTupcount, 
			hideRatio, gp_appendonly_compaction_threshold);
	}
	AppendOnlyVisimap_Finish(&visiMap, ShareLock);
	return result;
}
Beispiel #18
0
/*
 * Create append-only auxiliary relations for target relation rel.
 * Returns true if they are newly created.  If pg_appendonly has already
 * known those tables, don't create them and returns false.
 */
bool
CreateAOAuxiliaryTable(
		Relation rel,
		const char *auxiliaryNamePrefix,
		char relkind,
		TupleDesc tupledesc,
		IndexInfo  *indexInfo,
		Oid	*classObjectId,
		int16 *coloptions)
{
	char aoauxiliary_relname[NAMEDATALEN];
	char aoauxiliary_idxname[NAMEDATALEN];
	bool shared_relation;
	Oid relOid, aoauxiliary_relid = InvalidOid;
	Oid aoauxiliary_idxid = InvalidOid;
	ObjectAddress baseobject;
	ObjectAddress aoauxiliaryobject;

	Assert(RelationIsValid(rel));
	Assert(RelationIsAoRows(rel) || RelationIsAoCols(rel));
	Assert(auxiliaryNamePrefix);
	Assert(tupledesc);
	Assert(classObjectId);
	if (relkind != RELKIND_AOSEGMENTS)
		Assert(indexInfo);

	shared_relation = rel->rd_rel->relisshared;
	/*
	 * We cannot allow creating an auxiliary table for a shared relation
	 * after initdb (because there's no way to let other databases know
	 * this visibility map.
	 */
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot have append-only auxiliary relations after initdb")));

	relOid = RelationGetRelid(rel);

	switch(relkind)
	{
		case RELKIND_AOVISIMAP:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,
				NULL, NULL, &aoauxiliary_relid, &aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,
				&aoauxiliary_relid, &aoauxiliary_idxid, NULL, NULL);
			break;
		case RELKIND_AOSEGMENTS:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow,
				&aoauxiliary_relid,
				NULL, NULL, NULL, NULL);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Does it have the auxiliary relation?
	 */
	if (OidIsValid(aoauxiliary_relid))
	{
		return false;
	}

	snprintf(aoauxiliary_relname, sizeof(aoauxiliary_relname),
			 "%s_%u", auxiliaryNamePrefix, relOid);
	snprintf(aoauxiliary_idxname, sizeof(aoauxiliary_idxname),
			 "%s_%u_index", auxiliaryNamePrefix, relOid);

	/*
	 * We place auxiliary relation in the pg_aoseg namespace
	 * even if its master relation is a temp table. There cannot be
	 * any naming collision, and the auxiliary relation will be
	 * destroyed when its master is, so there is no need to handle
	 * the aovisimap relation as temp.
	 */
	aoauxiliary_relid = heap_create_with_catalog(aoauxiliary_relname,
											     PG_AOSEGMENT_NAMESPACE,
											     rel->rd_rel->reltablespace,
											     InvalidOid,
											     rel->rd_rel->relowner,
											     tupledesc,
											     /* relam */ InvalidOid,
											     relkind,
											     RELSTORAGE_HEAP,
											     shared_relation,
											     true,
											     /* bufferPoolBulkLoad */ false,
											     0,
											     ONCOMMIT_NOOP,
											     NULL, /* GP Policy */
											     (Datum) 0,
											     true,
												 /* valid_opts */ false,
											     /* persistentTid */ NULL,
											     /* persistentSerialNum */ NULL);

	/* Make this table visible, else index creation will fail */
	CommandCounterIncrement();

	/* Create an index on AO auxiliary tables (like visimap) except for pg_aoseg table */
	if (relkind != RELKIND_AOSEGMENTS)
	{
		aoauxiliary_idxid = index_create(aoauxiliary_relid,
										 aoauxiliary_idxname,
										 InvalidOid,
										 indexInfo,
										 BTREE_AM_OID,
										 rel->rd_rel->reltablespace,
										 classObjectId, coloptions, (Datum) 0,
										 true, false, true, false,
										 false, NULL);

		/* Unlock target table -- no one can see it */
		UnlockRelationOid(aoauxiliary_relid, ShareLock);

		/* Unlock the index -- no one can see it anyway */
		UnlockRelationOid(aoauxiliary_idxid, AccessExclusiveLock);
	}

	/*
	 * Store the auxiliary table's OID in the parent relation's pg_appendonly row.
	 * TODO (How to generalize this?)
	 */
	switch (relkind)
	{
		case RELKIND_AOVISIMAP:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid,
								 InvalidOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid,
								 InvalidOid, InvalidOid);
			break;
		case RELKIND_AOSEGMENTS:
			UpdateAppendOnlyEntryAuxOids(relOid,
								 aoauxiliary_relid,
								 InvalidOid, InvalidOid,
								 InvalidOid, InvalidOid);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Register dependency from the auxiliary table to the master, so that the
	 * aoseg table will be deleted if the master is.
	 */
	baseobject.classId = RelationRelationId;
	baseobject.objectId = relOid;
	baseobject.objectSubId = 0;
	aoauxiliaryobject.classId = RelationRelationId;
	aoauxiliaryobject.objectId = aoauxiliary_relid;
	aoauxiliaryobject.objectSubId = 0;

	recordDependencyOn(&aoauxiliaryobject, &baseobject, DEPENDENCY_INTERNAL);

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Beispiel #19
0
void
AlterTableCreateAoVisimapTable(Oid relOid, bool is_part_child)
{
	Relation	rel;
	IndexInfo  *indexInfo;
	TupleDesc	tupdesc;
	Oid			classObjectId[2];
	int16		coloptions[2];

	elogif(Debug_appendonly_print_visimap, LOG,
		   "Create visimap for relation %d",
		   relOid);

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel))
	{
		heap_close(rel, NoLock);
		return;
	}

	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(Natts_pg_aovisimap, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "visimap",
					   BYTEAOID,
					   -1, 0);

	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 2;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_Concurrent = false;

	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT8_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;

	(void) CreateAOAuxiliaryTable(rel,
								  "pg_aovisimap",
								  RELKIND_AOVISIMAP,
								  tupdesc, indexInfo, classObjectId, coloptions);

	heap_close(rel, NoLock);
}
Beispiel #20
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 */
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
		lazy_truncate_heap(onerel, vacrelstats);

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	/* Update statistics in pg_class */
	vac_update_relstats(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);
}
Beispiel #21
0
/* ----------------------------------------------------------------
 *		ExecUpdate
 *
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
 * ----------------------------------------------------------------
 */
void
ExecUpdate(TupleTableSlot *slot,
		   ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
		   EState *estate)
{
	void*	tuple;
	ResultRelInfo *resultRelInfo;
	Relation	resultRelationDesc;
	HTSU_Result result;
	ItemPointerData update_ctid;
	TransactionId update_xmax;
	AOTupleId	aoTupleId = AOTUPLEID_INIT;
	TupleTableSlot *partslot = NULL;

	/*
	 * abort the operation if not running transactions
	 */
	if (IsBootstrapProcessingMode())
		elog(ERROR, "cannot UPDATE during bootstrap");
	
	/*
	 * get information on the (current) result relation
	 */
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	bool		rel_is_heap = RelationIsHeap(resultRelationDesc);
	bool 		rel_is_aorows = RelationIsAoRows(resultRelationDesc);
	bool		rel_is_aocols = RelationIsAoCols(resultRelationDesc);
	bool		rel_is_external = RelationIsExternal(resultRelationDesc);

	/*
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
	 */
	if (rel_is_heap)
	{
		partslot = slot;
		tuple = ExecFetchSlotHeapTuple(partslot);
	}
	else if (rel_is_aorows || rel_is_aocols)
	{
		/*
		 * It is necessary to reconstruct a logically compatible tuple to
		 * a phyiscally compatible tuple.  The slot's tuple descriptor comes
		 * from the projection target list, which doesn't indicate dropped
		 * columns, and MemTuple cannot deal with cases without converting
		 * the target list back into the original relation's tuple desc.
		 */
		partslot = reconstructMatchingTupleSlot(slot, resultRelInfo);

		/*
		 * We directly inline toasted columns here as update with toasted columns
		 * would create two references to the same toasted value.
		 */
		tuple = ExecFetchSlotMemTuple(partslot, true);
	}
	else if (rel_is_external) 
	{
		if (estate->es_result_partitions && 
			estate->es_result_partitions->part->parrelid != 0)
		{
			ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				errmsg("Update external partitions not supported.")));			
			return;
		}
		else
		{
			partslot = slot;
			tuple = ExecFetchSlotHeapTuple(partslot);
		}
	}
	else 
	{
		Insist(false);
	}

	/* see if this update would move the tuple to a different partition */
	if (estate->es_result_partitions)
		checkPartitionUpdate(estate, partslot, resultRelInfo);

	/* BEFORE ROW UPDATE Triggers */
	if (resultRelInfo->ri_TrigDesc &&
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
	{
		HeapTuple	newtuple;

		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
										tupleid, tuple,
										estate->es_snapshot->curcid);

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
			/*
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
			 */
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor);
			ExecStoreGenericTuple(newtuple, newslot, false);
            newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */
			partslot = newslot;
			tuple = newtuple;
		}
	}

	/*
	 * Check the constraints of the tuple
	 *
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
	 */
lreplace:;
	if (resultRelationDesc->rd_att->constr)
		ExecConstraints(resultRelInfo, partslot, estate);

	if (!GpPersistent_IsPersistentRelation(resultRelationDesc->rd_id))
	{
		/*
		 * Normal UPDATE path.
		 */

		/*
		 * replace the heap tuple
		 *
		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
		 * the row to be updated is visible to that snapshot, and throw a can't-
		 * serialize error if not.	This is a special-case behavior needed for
		 * referential integrity updates in serializable transactions.
		 */
		if (rel_is_heap)
		{
			result = heap_update(resultRelationDesc, tupleid, tuple,
							 &update_ctid, &update_xmax,
							 estate->es_snapshot->curcid,
							 estate->es_crosscheck_snapshot,
							 true /* wait for commit */ );
		} 
		else if (rel_is_aorows)
		{
			if (IsXactIsoLevelSerializable)
			{
				ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					errmsg("Updates on append-only tables are not supported in serializable transactions.")));			
			}

			if (resultRelInfo->ri_updateDesc == NULL)
			{
				ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
				resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc)
					appendonly_update_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno);
			}
			result = appendonly_update(resultRelInfo->ri_updateDesc,
								 tuple, (AOTupleId *) tupleid, &aoTupleId);
		}
		else if (rel_is_aocols)
		{
			if (IsXactIsoLevelSerializable)
			{
				ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					errmsg("Updates on append-only tables are not supported in serializable transactions.")));			
			}

			if (resultRelInfo->ri_updateDesc == NULL)
			{
				ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos);
				resultRelInfo->ri_updateDesc = (AppendOnlyUpdateDesc)
					aocs_update_init(resultRelationDesc, resultRelInfo->ri_aosegno);
			}
			result = aocs_update(resultRelInfo->ri_updateDesc,
								 partslot, (AOTupleId *) tupleid, &aoTupleId);
		}
		else
		{
			Assert(!"We should not be here");
		}
		switch (result)
		{
			case HeapTupleSelfUpdated:
				/* already deleted by self; nothing to do */
				return;

			case HeapTupleMayBeUpdated:
				break;

			case HeapTupleUpdated:
				if (IsXactIsoLevelSerializable)
					ereport(ERROR,
							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
							 errmsg("could not serialize access due to concurrent update")));
				else if (!ItemPointerEquals(tupleid, &update_ctid))
				{
					TupleTableSlot *epqslot;

					epqslot = EvalPlanQual(estate,
										   resultRelInfo->ri_RangeTableIndex,
										   &update_ctid,
										   update_xmax,
										   estate->es_snapshot->curcid);
					if (!TupIsNull(epqslot))
					{
						*tupleid = update_ctid;
						partslot = ExecFilterJunk(estate->es_junkFilter, epqslot);
						tuple = ExecFetchSlotHeapTuple(partslot);
						goto lreplace;
					}
				}
				/* tuple already deleted; nothing to do */
				return;

			default:
				elog(ERROR, "unrecognized heap_update status: %u", result);
				return;
		}
	}
	else
	{
		HeapTuple persistentTuple;

		/*
		 * Persistent metadata path.
		 */
		persistentTuple = heap_copytuple(tuple);
		persistentTuple->t_self = *tupleid;

		frozen_heap_inplace_update(resultRelationDesc, persistentTuple);

		heap_freetuple(persistentTuple);
	}

	IncrReplaced();
	(estate->es_processed)++;
	(resultRelInfo->ri_aoprocessed)++;

	/*
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
	 * here is insert new index tuples.  -cim 9/27/89
	 */
	/*
	 * insert index entries for tuple
	 *
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
	 */
	if (rel_is_aorows || rel_is_aocols)
	{
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false);
	}
	else
	{
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(partslot, &(((HeapTuple) tuple)->t_self), estate, false);
	}

	/* AFTER ROW UPDATE Triggers */
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);

}
static Datum
gp_aovisimap_entry_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[4];
	bool		nulls[4];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		AppendOnlyVisimap visiMap;

		Relation parentRelation;

		IndexScanDesc indexScan;

		text *bitmapBuffer;
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;
		
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(4, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_num",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hidden_tupcount",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "bitmap",
						   TEXTOID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->parentRelation = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		AppendOnlyVisimap_Init(&context->visiMap,
				context->parentRelation->rd_appendonly->visimaprelid,
				context->parentRelation->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);

		context->indexScan = AppendOnlyVisimapStore_BeginScan(&
			context->visiMap.visimapStore, 0, NULL);

		context->bitmapBuffer = palloc0(VARHDRSZ + APPENDONLY_VISIMAP_MAX_RANGE + 1);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	if (AppendOnlyVisimapStore_GetNext(&context->visiMap.visimapStore,
				context->indexScan,
				ForwardScanDirection,
				&context->visiMap.visimapEntry,
				NULL))
	{
		AppendOnlyVisimapEntry *visimapEntry = &context->visiMap.visimapEntry;

		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = Int32GetDatum(visimapEntry->segmentFileNum);
		values[1] = Int64GetDatum(visimapEntry->firstRowNum);
		values[2] = Int32GetDatum(
				(int32)AppendOnlyVisimapEntry_GetHiddenTupleCount(visimapEntry));
		
		gp_aovisimap_encode_bitmap(VARDATA(context->bitmapBuffer), 
				visimapEntry->bitmap);
		SET_VARSIZE(context->bitmapBuffer, APPENDONLY_VISIMAP_MAX_RANGE);
		values[3] = PointerGetDatum(context->bitmapBuffer);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimapStore_EndScan(&context->visiMap.visimapStore,
			context->indexScan);
	AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock);
	heap_close(context->parentRelation, AccessShareLock);

	pfree(context->bitmapBuffer);
	pfree(context);
	funcctx->user_fctx = NULL;

	SRF_RETURN_DONE(funcctx);
}
Beispiel #23
0
/*
 * Assumes that the segment file lock is already held.
 * Assumes that the segment file should be compacted.
 *
 */
static void
AppendOnlySegmentFileFullCompaction(Relation aorel, 
		AppendOnlyEntry *aoEntry, 
		AppendOnlyInsertDesc insertDesc,
		FileSegInfo* fsinfo)
{
	const char* relname;
	AppendOnlyVisimap visiMap;
	AppendOnlyScanDesc scanDesc;
	TupleDesc tupDesc;
	MemTuple		tuple;
	TupleTableSlot	*slot;
	MemTupleBinding *mt_bind;
	int compact_segno;
	int64 movedTupleCount = 0;
	ResultRelInfo *resultRelInfo;
	EState *estate;
	AOTupleId *aoTupleId;
	int64 tupleCount = 0;
	int64 tuplePerPage = INT_MAX;

	Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert(RelationIsAoRows(aorel));
	Assert(insertDesc);

	compact_segno = fsinfo->segno;
	if (fsinfo->varblockcount > 0)
	{
		tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount;
	}
	relname = RelationGetRelationName(aorel);

	AppendOnlyVisimap_Init(&visiMap,
			aoEntry->visimaprelid,
			aoEntry->visimapidxid,
			ShareUpdateExclusiveLock,
			SnapshotNow);

	elogif(Debug_appendonly_print_compaction,
			LOG, "Compact AO segno %d, relation %s, insert segno %d", 
			compact_segno, relname, insertDesc->storageWrite.segmentFileNum);

	/*
	 * Todo: We need to limit the scan to one file and we need to avoid to
	 * lock the file again.
     *
     * We use SnapshotAny to get visible and invisible tuples.
	 */
	scanDesc = appendonly_beginrangescan(aorel,
			SnapshotAny, SnapshotNow,
			&compact_segno, 1, 0, NULL);

	tupDesc = RelationGetDescr(aorel);
	slot = MakeSingleTupleTableSlot(tupDesc);
	mt_bind = create_memtuple_binding(tupDesc);

	/*
	 * We need a ResultRelInfo and an EState so we can use the regular
	 * executor's index-entry-making machinery.
	 */
	estate = CreateExecutorState();
	resultRelInfo = makeNode(ResultRelInfo);
	resultRelInfo->ri_RangeTableIndex = 1;	/* dummy */
	resultRelInfo->ri_RelationDesc = aorel;
	resultRelInfo->ri_TrigDesc = NULL;		/* we don't fire triggers */
	ExecOpenIndices(resultRelInfo);
	estate->es_result_relations = resultRelInfo;
	estate->es_num_result_relations = 1;
	estate->es_result_relation_info = resultRelInfo;

	/*
	 * Go through all visible tuples and move them to a new segfile.
	 */
	while ((tuple = appendonly_getnext(scanDesc, ForwardScanDirection, slot)) != NULL)
	{
		/* Check interrupts as this may take time. */
		CHECK_FOR_INTERRUPTS();

		aoTupleId = (AOTupleId*)slot_get_ctid(slot);
		if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId))
		{
			AppendOnlyMoveTuple(tuple,
							slot,
							mt_bind,
							insertDesc,
							resultRelInfo,
							estate);
			movedTupleCount++;
		}
		else
		{
			/* Tuple is invisible and needs to be dropped */
			AppendOnlyThrowAwayTuple(aorel, 
							tuple,
							slot,
							mt_bind);
		}

		/* 
		 * Check for vacuum delay point after approximatly a var block
		 */
		tupleCount++;
		if (VacuumCostActive && tupleCount % tuplePerPage == 0)
		{
			vacuum_delay_point();
		}
	}

	SetFileSegInfoState(aorel, aoEntry, compact_segno, AOSEG_STATE_AWAITING_DROP);

	AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno);

	/* Delete all mini pages of the segment files if block directory exists */
	if (OidIsValid(aoEntry->blkdirrelid))
	{
		AppendOnlyBlockDirectory_DeleteSegmentFile(
			aoEntry,
			SnapshotNow,
			compact_segno,
			0);
	}

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Finished compaction: "
		   "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT,
		   compact_segno, relname, movedTupleCount);

	AppendOnlyVisimap_Finish(&visiMap, NoLock);

	ExecCloseIndices(resultRelInfo);
	FreeExecutorState(estate);

	ExecDropSingleTupleTableSlot(slot);
	destroy_memtuple_binding(mt_bind);

	appendonly_endscan(scanDesc);
}
Beispiel #24
0
/*
 * create_aoblkdir_table
 *
 * rel is already opened and exclusive-locked.
 * comptypeOid is InvalidOid.
 */
static bool
create_aoblkdir_table(Relation rel, Oid aoblkdirOid,
					  Oid aoblkdirIndexOid, Oid *comptypeOid)
{
	Oid relOid = RelationGetRelid(rel);
	Oid	aoblkdir_relid;
	Oid	aoblkdir_idxid;
	bool shared_relation = rel->rd_rel->relisshared;
	char aoblkdir_relname[NAMEDATALEN];
	char aoblkdir_idxname[NAMEDATALEN];
	TupleDesc	tupdesc;
	IndexInfo  *indexInfo;
	Oid			classObjectId[3];
	ObjectAddress baseobject;
	ObjectAddress aoblkdirobject;
	Oid			tablespaceOid = ChooseTablespaceForLimitedObject(rel->rd_rel->reltablespace);

	if (!RelationIsAoRows(rel))
		return false;
	
	/*
	 * We cannot allow creating a block directory for a shared relation
	 * after initdb (because there's no way to let other databases know
	 * this block directory.
	 */
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot have block directory after initdb")));

	GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,NULL, &aoblkdir_relid, &aoblkdir_idxid);

	/*
	 * Does it have a block directory?
	 */
	if (aoblkdir_relid != InvalidOid)
	{
		return false;
	}

	snprintf(aoblkdir_relname, sizeof(aoblkdir_relname),
			 "pg_aoblkdir_%u", relOid);
	snprintf(aoblkdir_idxname, sizeof(aoblkdir_idxname),
			 "pg_aoblkdir_%u_index", relOid);
	
	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(4, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "columngroup_no",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 4,
					   "minipage",
					   VARBITOID,
					   -1, 0);
	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * We place aoblkdir relation in the pg_aoseg namespace
	 * even if its master relation is a temp table. There cannot be
	 * any naming collision, and the aoblkdir relation will be
	 * destroyed when its master is, so there is no need to handle
	 * the aoblkdir relation as temp.
	 */
	aoblkdir_relid = heap_create_with_catalog(aoblkdir_relname,
											  PG_AOSEGMENT_NAMESPACE,
											  tablespaceOid,
											  aoblkdirOid,
											  rel->rd_rel->relowner,
											  tupdesc,
											  /* relam */ InvalidOid,
											  RELKIND_AOBLOCKDIR,
											  RELSTORAGE_HEAP,
											  shared_relation,
											  true,
											  /* bufferPoolBulkLoad */ false,
											  0,
											  ONCOMMIT_NOOP,
											  NULL, /* GP Policy */
											  (Datum) 0,
											  true,
											  comptypeOid,
						 					  /* persistentTid */ NULL,
						 					  /* persistentSerialNum */ NULL);
	
	/* Make this table visible, else index creation will fail */
	CommandCounterIncrement();
	
	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 3;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_KeyAttrNumbers[2] = 3;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = false;
	indexInfo->ii_Concurrent = false;
	
	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;
	classObjectId[2] = INT8_BTREE_OPS_OID;

	aoblkdir_idxid = index_create(aoblkdirOid, aoblkdir_idxname, aoblkdirIndexOid,
								  indexInfo,
								  BTREE_AM_OID,
								  tablespaceOid,
								  classObjectId, (Datum) 0,
								  true, false, (Oid *) NULL, true, false, false, NULL);
	
	/* Unlock target table -- no one can see it */
	UnlockRelationOid(aoblkdirOid, ShareLock);
	/* Unlock the index -- no one can see it anyway */
	UnlockRelationOid(aoblkdirIndexOid, AccessExclusiveLock);

	/*
	 * Store the aoblkdir table's OID in the parent relation's pg_appendonly row.
	 */
	UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid,
								 aoblkdir_relid, aoblkdir_idxid);

	/*
	 * Register dependency from the aoseg table to the master, so that the
	 * aoseg table will be deleted if the master is.
	 */
	baseobject.classId = RelationRelationId;
	baseobject.objectId = relOid;
	baseobject.objectSubId = 0;
	aoblkdirobject.classId = RelationRelationId;
	aoblkdirobject.objectId = aoblkdirOid;
	aoblkdirobject.objectSubId = 0;

	recordDependencyOn(&aoblkdirobject, &baseobject, DEPENDENCY_INTERNAL);

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Beispiel #25
0
/*
 * Performs a compaction of an append-only relation.
 *
 * In non-utility mode, all compaction segment files should be
 * marked as in-use/in-compaction in the appendonlywriter.c code.
 *
 */ 
void
AppendOnlyDrop(Relation aorel,
		List* compaction_segno)
{
	const char* relname;
	int total_segfiles;
	FileSegInfo** segfile_array;
	int i, segno;
	FileSegInfo* fsinfo;

	Assert (Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
	Assert (RelationIsAoRows(aorel));

	relname = RelationGetRelationName(aorel);
	AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow);

	elogif (Debug_appendonly_print_compaction, LOG, 
			"Drop AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllFileSegInfo(aorel, aoEntry, SnapshotNow, &total_segfiles);

	for(i = 0 ; i < total_segfiles ; i++)
	{
		segno = segfile_array[i]->segno;
		if (list_find_int(compaction_segno, segno) < 0)
		{
			continue;
		}

		/*
		 * Try to get the transaction write-lock for the Append-Only segment file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until commit / abort.
		 */
		LockRelationAppendOnlySegmentFile(
												&aorel->rd_node,
												segfile_array[i]->segno,
												AccessExclusiveLock,
												false);

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetFileSegInfo(aorel, aoEntry, SnapshotNow, segno);

		if (fsinfo->state == AOSEG_STATE_AWAITING_DROP)
		{
			Assert(HasLockForSegmentFileDrop(aorel));
			Assert(!HasSerializableBackends(false));
			AppendOnlyCompaction_DropSegmentFile(aorel, segno);
			ClearFileSegInfo(aorel, aoEntry, segno,
					AOSEG_STATE_DEFAULT);
		}
		pfree(fsinfo);
	}

	pfree(aoEntry);

	if (segfile_array)
	{
		FreeAllSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
}
Beispiel #26
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 *
 *		The return value indicates whether this function has held off
 *		interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
 */
bool
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
	bool		heldoff = false;

	pg_rusage_init(&ru0);

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
		starttime = GetCurrentTimestamp();

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return false;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 *
	 * Note that after we've truncated the heap, it's too late to abort the
	 * transaction; doing so would lose the sinval messages needed to tell
	 * the other backends about the table being shrunk.  We prevent interrupts
	 * in that case; caller is responsible for re-enabling them after
	 * committing the transaction.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
	{
		HOLD_INTERRUPTS();
		heldoff = true;
		lazy_truncate_heap(onerel, vacrelstats);
	}

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	if (vacrelstats->tot_free_pages > MaxFSMPages)
		ereport(WARNING,
				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
						get_namespace_name(RelationGetNamespace(onerel)),
						RelationGetRelationName(onerel)),
				 /* Only suggest VACUUM FULL if > 20% free */
				 (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
				 errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
				 errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));

	/* Update statistics in pg_class */
	vac_update_relstats_from_list(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);

	if (gp_indexcheck_vacuum == INDEX_CHECK_ALL ||
		(gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM &&
		 PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel)))
	{
		int			i;

		for (i = 0; i < nindexes; i++)
		{
			if (Irel[i]->rd_rel->relam == BTREE_AM_OID)
				_bt_validate_vacuum(Irel[i], onerel, OldestXmin);
		}
	}

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
									   Log_autovacuum_min_duration))
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
						  vacrelstats->pages_removed, vacrelstats->rel_pages,
						vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
							pg_rusage_show(&ru0))));
	}

	return heldoff;
}
Beispiel #27
0
/*
 * lazy_vacuum_aorel -- perform LAZY VACUUM for one Append-only relation.
 */
static void
lazy_vacuum_aorel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	bool		update_relstats = true;

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	if (vacuumStatement_IsInAppendOnlyPreparePhase(vacstmt))
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum prepare phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_indexes(onerel, vacstmt, updated_stats);
		if (RelationIsAoRows(onerel))
			AppendOnlyTruncateToEOF(onerel);
		else
			AOCSTruncateToEOF(onerel);

		/*
		 * MPP-23647.  For empty tables, we skip compaction phase
		 * and cleanup phase.  Therefore, we update the stats
		 * (specifically, relfrozenxid) in prepare phase if the
		 * table is empty.  Otherwise, the stats will be updated in
		 * the cleanup phase, when we would have computed the
		 * correct values for stats.
		 */
		if (vacstmt->appendonly_relation_empty)
		{
			update_relstats = true;
			/*
			 * For an empty relation, the only stats we care about
			 * is relfrozenxid and relhasindex.  We need to be
			 * mindful of correctly setting relhasindex here.
			 * relfrozenxid is already taken care of above by
			 * calling vacuum_set_xid_limits().
			 */
			vacrelstats->hasindex = onerel->rd_rel->relhasindex;
		}
		else
		{
			/*
			 * For a non-empty relation, follow the usual
			 * compaction phases and do not update stats in
			 * prepare phase.
			 */
			update_relstats = false;
	}
	}
	else if (!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
		vacuum_appendonly_rel(onerel, vacstmt);
		update_relstats = false;
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum cleanup phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_fill_stats(onerel, ActiveSnapshot,
									 &vacrelstats->rel_pages,
									 &vacrelstats->rel_tuples,
									 &vacrelstats->hasindex);
		/* reset the remaining LVRelStats values */
		vacrelstats->nonempty_pages = 0;
		vacrelstats->num_dead_tuples = 0;
		vacrelstats->max_dead_tuples = 0;
		vacrelstats->tuples_deleted = 0;
		vacrelstats->tot_free_pages = 0;
		vacrelstats->fs_is_heap = false;
		vacrelstats->num_free_pages = 0;
		vacrelstats->max_free_pages = 0;
		vacrelstats->pages_removed = 0;
	}

	if (update_relstats)
	{
		/* Update statistics in pg_class */
		vac_update_relstats_from_list(onerel,
							vacrelstats->rel_pages,
							vacrelstats->rel_tuples,
							vacrelstats->hasindex,
							FreezeLimit,
							updated_stats);

		/* report results to the stats collector, too */
		pgstat_report_vacuum(RelationGetRelid(onerel),
							 onerel->rd_rel->relisshared,
							 true /*vacrelstats->scanned_all*/,
							 vacstmt->analyze, vacrelstats->rel_tuples);
	}
}
Beispiel #28
0
/* ----------------------------------------------------------------
 *		ExecInsert
 *
 *		INSERTs have to add the tuple into
 *		the base relation and insert appropriate tuples into the
 *		index relations.
 *		Insert can be part of an update operation when
 *		there is a preceding SplitUpdate node. 
 * ----------------------------------------------------------------
 */
void
ExecInsert(TupleTableSlot *slot,
		   DestReceiver *dest,
		   EState *estate,
		   PlanGenerator planGen,
		   bool isUpdate)
{
	void		*tuple = NULL;
	ResultRelInfo *resultRelInfo = NULL;
	Relation	resultRelationDesc = NULL;
	Oid			newId = InvalidOid;
	TupleTableSlot *partslot = NULL;

	AOTupleId	aoTupleId = AOTUPLEID_INIT;

	bool		rel_is_heap = false;
	bool 		rel_is_aorows = false;
	bool		rel_is_external = false;
    bool		rel_is_parquet = false;

	/*
	 * get information on the (current) result relation
	 */
	if (estate->es_result_partitions)
	{
		resultRelInfo = slot_get_partition(slot, estate);
		estate->es_result_relation_info = resultRelInfo;

		if (NULL != resultRelInfo->ri_parquetSendBack)
		{
			/*
			 * The Parquet part we are about to insert into
			 * has sendBack information. This means we're inserting into the
			 * part twice, which is not supported. Error out (GPSQL-2291)
			 */
			Assert(gp_parquet_insert_sort);
			ereport(ERROR, (errcode(ERRCODE_CDB_FEATURE_NOT_YET),
					errmsg("Cannot insert out-of-order tuples in parquet partitions"),
					errhint("Sort the data on the partitioning key(s) before inserting"),
					errOmitLocation(true)));
		}

		/*
		 * Check if we need to close the last parquet partition we
		 * inserted into (GPSQL-2291).
		 */
		Oid new_part_oid = resultRelInfo->ri_RelationDesc->rd_id;
		if (gp_parquet_insert_sort &&
				PLANGEN_OPTIMIZER == planGen &&
				InvalidOid != estate->es_last_parq_part &&
				new_part_oid != estate->es_last_parq_part)
		{

			Assert(NULL != estate->es_partition_state->result_partition_hash);

			ResultPartHashEntry *entry = hash_search(estate->es_partition_state->result_partition_hash,
									&estate->es_last_parq_part,
									HASH_FIND,
									NULL /* found */);

			Assert(NULL != entry);
			Assert(entry->offset < estate->es_num_result_relations);

			ResultRelInfo *oldResultRelInfo = & estate->es_result_relations[entry->offset];

			elog(DEBUG1, "Switching from old part oid=%d name=[%s] to new part oid=%d name=[%s]",
					estate->es_last_parq_part,
					oldResultRelInfo->ri_RelationDesc->rd_rel->relname.data,
					new_part_oid,
					resultRelInfo->ri_RelationDesc->rd_rel->relname.data);

			/*
			 * We are opening a new partition, and the last partition we
			 * inserted into was a Parquet part. Let's close the old
			 * parquet insert descriptor to free the memory before
			 * opening the new one.
			 */
			ParquetInsertDescData *oldInsertDesc = oldResultRelInfo->ri_parquetInsertDesc;

			/*
			 * We need to preserve the "sendback" information that needs to be
			 * sent back to the QD process from this part.
			 * Compute it here, and store it for later use.
			 */
			QueryContextDispatchingSendBack sendback =
					CreateQueryContextDispatchingSendBack(1);
			sendback->relid = RelationGetRelid(oldResultRelInfo->ri_RelationDesc);
			oldInsertDesc->sendback = sendback;
			parquet_insert_finish(oldInsertDesc);

			/* Store the sendback information in the resultRelInfo for this part */
			oldResultRelInfo->ri_parquetSendBack = sendback;

			/* Record in the resultRelInfo that we closed the parquet insert descriptor */
			oldResultRelInfo->ri_parquetInsertDesc = NULL;

			/* Reset the last parquet part Oid, it's now closed */
			estate->es_last_parq_part = InvalidOid;
		}
	}
	else
	{
		resultRelInfo = estate->es_result_relation_info;
	}

	Assert (!resultRelInfo->ri_projectReturning);

	resultRelationDesc = resultRelInfo->ri_RelationDesc;

	rel_is_heap = RelationIsHeap(resultRelationDesc);
	rel_is_aorows = RelationIsAoRows(resultRelationDesc);
	rel_is_external = RelationIsExternal(resultRelationDesc);
    rel_is_parquet = RelationIsParquet(resultRelationDesc);

	/* Validate that insert is not part of an non-allowed update operation. */
	if (isUpdate && (rel_is_aorows || rel_is_parquet))
	{
		ereport(ERROR,
			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				errmsg("Append-only tables are not updatable. Operation not permitted."),
				errOmitLocation(true)));
	}

	partslot = reconstructMatchingTupleSlot(slot, resultRelInfo);
	if (rel_is_heap || rel_is_external)
	{
		tuple = ExecFetchSlotHeapTuple(partslot);
	}
	else if (rel_is_aorows)
	{
		tuple = ExecFetchSlotMemTuple(partslot, false);
	}
	else if (rel_is_parquet)
	{
		tuple = NULL;
	}

	Assert( partslot != NULL );
	Assert( rel_is_parquet || (tuple != NULL));

	/* Execute triggers in Planner-generated plans */
	if (planGen == PLANGEN_PLANNER)
	{
		/* BEFORE ROW INSERT Triggers */
		if (resultRelInfo->ri_TrigDesc &&
			resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
		{
			HeapTuple	newtuple;

			/* NYI */
			if(rel_is_parquet)
				elog(ERROR, "triggers are not supported on tables that use column-oriented storage");

			newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);

			if (newtuple == NULL)	/* "do nothing" */
			{
				return;
			}

			if (newtuple != tuple)	/* modified by Trigger(s) */
			{
				/*
				 * Put the modified tuple into a slot for convenience of routines
				 * below.  We assume the tuple was allocated in per-tuple memory
				 * context, and therefore will go away by itself. The tuple table
				 * slot should not try to clear it.
				 */
				TupleTableSlot *newslot = estate->es_trig_tuple_slot;

				if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor)
					ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor);
				ExecStoreGenericTuple(newtuple, newslot, false);
				newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */
				tuple = newtuple;
				partslot = newslot;
			}
		}
	}
	/*
	 * Check the constraints of the tuple
	 */
	if (resultRelationDesc->rd_att->constr &&
			planGen == PLANGEN_PLANNER)
	{
		ExecConstraints(resultRelInfo, partslot, estate);
	}
	/*
	 * insert the tuple
	 *
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
	 *
	 * NOTE: for append-only relations we use the append-only access methods.
	 */
	if (rel_is_aorows)
	{
		if (resultRelInfo->ri_aoInsertDesc == NULL)
		{
			ResultRelSegFileInfo *segfileinfo = NULL;
			/* Set the pre-assigned fileseg number to insert into */
			ResultRelInfoSetSegFileInfo(resultRelInfo, estate->es_result_segfileinfos);
			segfileinfo = (ResultRelSegFileInfo *)list_nth(resultRelInfo->ri_aosegfileinfos, GetQEIndex());
			resultRelInfo->ri_aoInsertDesc =
				appendonly_insert_init(resultRelationDesc,
									   segfileinfo);

		}

		appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId);
	}
	else if (rel_is_external)
	{
		/* Writable external table */
		if (resultRelInfo->ri_extInsertDesc == NULL)
			resultRelInfo->ri_extInsertDesc = external_insert_init(
					resultRelationDesc, 0);

		newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple);
	}
    else if(rel_is_parquet)
	{
		/* If there is no parquet insert descriptor, create it now. */
		if (resultRelInfo->ri_parquetInsertDesc == NULL)
		{
			ResultRelSegFileInfo *segfileinfo = NULL;
			ResultRelInfoSetSegFileInfo(resultRelInfo, estate->es_result_segfileinfos);
			segfileinfo = (ResultRelSegFileInfo *)list_nth(resultRelInfo->ri_aosegfileinfos, GetQEIndex());
			resultRelInfo->ri_parquetInsertDesc = parquet_insert_init(resultRelationDesc, segfileinfo);

			/*
			 * Just opened a new parquet partition for insert. Save the Oid
			 * in estate, so that we can close it when switching to a
			 * new partition (GPSQL-2291)
			 */
			elog(DEBUG1, "Saving es_last_parq_part. Old=%d, new=%d", estate->es_last_parq_part, resultRelationDesc->rd_id);
			estate->es_last_parq_part = resultRelationDesc->rd_id;
		}

		newId = parquet_insert(resultRelInfo->ri_parquetInsertDesc, partslot);
	}
	else
	{
		Insist(rel_is_heap);

		newId = heap_insert(resultRelationDesc,
							tuple,
							estate->es_snapshot->curcid,
							true, true, GetCurrentTransactionId());
	}

	IncrAppended();
	(estate->es_processed)++;
	(resultRelInfo->ri_aoprocessed)++;
	estate->es_lastoid = newId;

	partslot->tts_tableOid = RelationGetRelid(resultRelationDesc);

	if (rel_is_aorows || rel_is_parquet)
	{

		/* NOTE: Current version does not support index upon parquet table. */
		/*
		 * insert index entries for AO Row-Store tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0 && !rel_is_parquet)
			ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false);
	}
	else
	{
		/* Use parttuple for index update in case this is an indexed heap table. */
		TupleTableSlot *xslot = partslot;
		void *xtuple = tuple;

		setLastTid(&(((HeapTuple) xtuple)->t_self));

		/*
		 * insert index entries for tuple
		 */
		if (resultRelInfo->ri_NumIndices > 0)
			ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false);

	}

	if (planGen == PLANGEN_PLANNER)
	{
		/* AFTER ROW INSERT Triggers */
		ExecARInsertTriggers(estate, resultRelInfo, tuple);
	}
}
Beispiel #29
0
void
AlterTableCreateAoSegTable(Oid relOid, bool is_part_child, bool is_part_parent)
{
	TupleDesc	tupdesc;
	Relation	rel;
	const char *prefix;

	/*
	 * Grab an exclusive lock on the target table, which we will NOT release
	 * until end of transaction.  (This is probably redundant in all present
	 * uses...)
	 */
	if (is_part_child)
		rel = heap_open(relOid, NoLock);
	else
		rel = heap_open(relOid, AccessExclusiveLock);

	if(RelationIsAoRows(rel))
	{
		prefix = "pg_aoseg";

		/* this is pretty painful...  need a tuple descriptor */
		tupdesc = CreateTemplateTupleDesc(8, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1,
						"segno",
						INT4OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2,
						"eof",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3,
						"tupcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4,
						"varblockcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5,
						"eofuncompressed",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6,
						"modcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7,
						"formatversion",
						INT2OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 8,
						"state",
						INT2OID,
						-1, 0);

	}
	else if (RelationIsAoCols(rel))
	{
		prefix = "pg_aocsseg";

		/*
		 * XXX
		 * At this moment, we hardwire the rel aocs info.
		 * Essentially, we assume total vertical partition, and
		 * we do not do datatype specific compression.
		 *
		 * In order to make things right, we need to first fix
		 * the DefineRelation, so that we store the per column
		 * info, then, we need to open the catalog, pull out
		 * info here.
		 */

		/*
		 * XXX We do not handle add/drop column etc nicely yet.
		 */

		/*
		 * Assuming full vertical partition, we want to include
		 * the following in the seg table.
		 *
		 * segno int,               -- whatever purpose ao use it
		 * tupcount bigint          -- total tup
		 * varblockcount bigint,    -- total varblock
		 * vpinfo varbinary(max)    -- vertical partition info encoded in 
		 *                             binary. NEEDS TO BE REFACTORED
		 *                             INTO MULTIPLE COLUMNS!!
		 * state (smallint)         -- state of the segment file
		 */

		tupdesc = CreateTemplateTupleDesc(7, false);

		TupleDescInitEntry(tupdesc, (AttrNumber) 1,
						   "segno",
						   INT4OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2,
						   "tupcount",
						   INT8OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3,
						   "varblockcount",
						   INT8OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4,
						   "vpinfo",
						   BYTEAOID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5,
						"modcount",
						INT8OID,
						-1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6,
						   "formatversion",
						   INT2OID,
						   -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7,
						   "state",
						   INT2OID,
						   -1, 0);
	}
	else
	{
		heap_close(rel, NoLock);
		return;
	}

	(void) CreateAOAuxiliaryTable(rel, prefix, RELKIND_AOSEGMENTS,
								  tupdesc,
								  NULL, NIL, NULL, NULL, is_part_parent);

	heap_close(rel, NoLock);
}
Beispiel #30
0
/*
 * Truncates each segment file to the AO relation to its EOF.
 * If we cannot get a lock on the segment file (because e.g. a concurrent insert)
 * the segment file is skipped.
 */
void
AppendOnlyTruncateToEOF(Relation aorel)
{
	const char* relname;
	int total_segfiles;
	FileSegInfo** segfile_array;
	int i, segno;
	LockAcquireResult acquireResult;
	FileSegInfo* fsinfo;

	Assert (RelationIsAoRows(aorel));

	relname = RelationGetRelationName(aorel);
	AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(RelationGetRelid(aorel), SnapshotNow);

	elogif (Debug_appendonly_print_compaction, LOG, 
			"Compact AO relation %s", relname);

	/* Get information about all the file segments we need to scan */
	segfile_array = GetAllFileSegInfo(aorel, aoEntry, SnapshotNow, &total_segfiles);

	for(i = 0 ; i < total_segfiles ; i++)
	{
		segno = segfile_array[i]->segno;

		/*
		 * Try to get the transaction write-lock for the Append-Only segment file.
		 *
		 * NOTE: This is a transaction scope lock that must be held until commit / abort.
		 */
		acquireResult = LockRelationAppendOnlySegmentFile(
												&aorel->rd_node,
												segfile_array[i]->segno,
												AccessExclusiveLock,
												/* dontWait */ true);
		if (acquireResult == LOCKACQUIRE_NOT_AVAIL)
		{
			elog(DEBUG5, "truncate skips AO segfile %d, "
					 "relation %s", segfile_array[i]->segno, relname);
			continue;
		}

		/* Re-fetch under the write lock to get latest committed eof. */
		fsinfo = GetFileSegInfo(aorel, aoEntry, SnapshotNow, segno);

		/*
		 * This should not occur since this segfile info was found by the
		 * "all" method, but better to catch for trouble shooting
		 * (possibly index corruption?)
		 */
		if (fsinfo == NULL)
			elog(ERROR, "file seginfo for AO relation %s %u/%u/%u (segno=%u) is missing",
				 relname,
				 aorel->rd_node.spcNode,
				 aorel->rd_node.dbNode,
				 aorel->rd_node.relNode,
				 segno);

		AppendOnlySegmentFileTruncateToEOF(aorel, fsinfo);
		pfree(fsinfo);
	}

	pfree(aoEntry);

	if (segfile_array)
	{
		FreeAllSegFileInfo(segfile_array, total_segfiles);
		pfree(segfile_array);
	}
}