Пример #1
5
StringInfo rest_call_with_lock(char *method, char *url, char *params, StringInfo postData, int64 mutex, bool shared, bool allowCancel) {
    CURL *curl;
    struct curl_slist *headers = NULL;
    char *errorbuff;

    StringInfo response = makeStringInfo();
    CURLcode ret;
    int64 response_code;

    errorbuff = (char *) palloc0(CURL_ERROR_SIZE);
    curl = curl_easy_init();

    if (curl) {
    	headers = curl_slist_append(headers, "Transfer-Encoding:");
	    headers = curl_slist_append(headers, "Expect:");

	    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
        curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 0L);   /* allow connections to be reused */
		if (allowCancel)
		{
			curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);      /* we want progress ... */
			curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, curl_progress_func);   /* to go here so we can detect a ^C within postgres */
		}
        curl_easy_setopt(curl, CURLOPT_USERAGENT, "zombodb for PostgreSQL");
        curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 0);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write_func);
        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
        curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorbuff);
        curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 60 * 60L);  /* timeout of 60 minutes */

        curl_easy_setopt(curl, CURLOPT_URL, url);
        curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, response);
        curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, postData ? postData->len : 0);
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postData ? postData->data : NULL);
        curl_easy_setopt(curl, CURLOPT_POST, (strcmp(method, "POST") == 0) || (strcmp(method, "GET") != 0 && postData && postData->data) ? 1 : 0);
    } else {
        elog(IsTransactionState() ? ERROR : WARNING, "Unable to initialize libcurl");
    }

//	if (mutex != 0)
//	{
//		if (shared) DirectFunctionCall1(pg_advisory_lock_shared_int8, Int64GetDatum(mutex));
//		else DirectFunctionCall1(pg_advisory_lock_int8, Int64GetDatum(mutex));
//	}

    ret = curl_easy_perform(curl);

//	if (mutex != 0)
//	{
//		if (shared) DirectFunctionCall1(pg_advisory_unlock_shared_int8, Int64GetDatum(mutex));
//		else DirectFunctionCall1(pg_advisory_unlock_int8, Int64GetDatum(mutex));
//	}

    if (allowCancel && IsTransactionState() && InterruptPending) {
        /* we might have detected one in the progress function, so check for sure */
        CHECK_FOR_INTERRUPTS();
    }

    if (ret != 0) {
        /* curl messed up */
        elog(IsTransactionState() ? ERROR : WARNING, "libcurl error-code: %s(%d); message: %s; req=-X%s %s ", curl_easy_strerror(ret), ret, errorbuff, method, url);
    }

    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
    if (response_code < 200 || (response_code >=300 && response_code != 404)) {
        text *errorText = DatumGetTextP(DirectFunctionCall2(json_object_field_text, CStringGetTextDatum(response->data), CStringGetTextDatum("error")));

        elog(IsTransactionState() ? ERROR : WARNING, "rc=%ld; %s", response_code, errorText != NULL ? TextDatumGetCString(errorText) : response->data);
    }

    if (headers)
    	curl_slist_free_all(headers);
    curl_easy_cleanup(curl);
    pfree(errorbuff);

    return response;
}
Пример #2
0
/*
 * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
 */
TableSampleDesc *
tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
{
	FunctionCallInfoData fcinfo;
	int			i;
	List	   *args = tablesample->args;
	ListCell   *arg;
	ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
	TableSampleDesc	*tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));

	/* Load functions */
	fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
	fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
	fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
	if (OidIsValid(tablesample->tsmexaminetuple))
		fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
	else
		tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
	fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
	fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));

	InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
							 list_length(args) + 2,
							 InvalidOid, NULL, NULL);

	tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
	tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;

	/* First argument for init function is always TableSampleDesc */
	fcinfo.arg[0] = PointerGetDatum(tsdesc);
	fcinfo.argnull[0] = false;

	/*
	 * Second arg for init function is always REPEATABLE
	 * When tablesample->repeatable is NULL then REPEATABLE clause was not
	 * specified.
	 * When specified, the expression cannot evaluate to NULL.
	 */
	if (tablesample->repeatable)
	{
		ExprState  *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
											(PlanState *) scanstate);
		fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
									 &fcinfo.argnull[1], NULL);
		if (fcinfo.argnull[1])
			ereport(ERROR,
					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
					 errmsg("REPEATABLE clause must be NOT NULL numeric value")));
	}
	else
	{
		fcinfo.arg[1] = UInt32GetDatum(random());
		fcinfo.argnull[1] = false;
	}

	/* Rest of the arguments come from user.  */
	i = 2;
	foreach(arg, args)
	{
		Expr	   *argexpr = (Expr *) lfirst(arg);
		ExprState  *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);

		if (argstate == NULL)
		{
			fcinfo.argnull[i] = true;
			fcinfo.arg[i] = (Datum) 0;;
		}

		fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
									 &fcinfo.argnull[i], NULL);
		i++;
	}
Пример #3
0
/* ----------------------------------------------------------------
 *		ExecInitGather
 * ----------------------------------------------------------------
 */
GatherMergeState *
ExecInitGatherMerge(GatherMerge *node, EState *estate, int eflags)
{
	GatherMergeState *gm_state;
	Plan	   *outerNode;
	TupleDesc	tupDesc;

	/* Gather merge node doesn't have innerPlan node. */
	Assert(innerPlan(node) == NULL);

	/*
	 * create state structure
	 */
	gm_state = makeNode(GatherMergeState);
	gm_state->ps.plan = (Plan *) node;
	gm_state->ps.state = estate;
	gm_state->ps.ExecProcNode = ExecGatherMerge;

	gm_state->initialized = false;
	gm_state->gm_initialized = false;
	gm_state->tuples_needed = -1;

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &gm_state->ps);

	/*
	 * GatherMerge doesn't support checking a qual (it's always more efficient
	 * to do it in the child node).
	 */
	Assert(!node->plan.qual);

	/*
	 * now initialize outer plan
	 */
	outerNode = outerPlan(node);
	outerPlanState(gm_state) = ExecInitNode(outerNode, estate, eflags);

	/*
	 * Leader may access ExecProcNode result directly (if
	 * need_to_scan_locally), or from workers via tuple queue.  So we can't
	 * trivially rely on the slot type being fixed for expressions evaluated
	 * within this node.
	 */
	gm_state->ps.outeropsset = true;
	gm_state->ps.outeropsfixed = false;

	/*
	 * Store the tuple descriptor into gather merge state, so we can use it
	 * while initializing the gather merge slots.
	 */
	tupDesc = ExecGetResultType(outerPlanState(gm_state));
	gm_state->tupDesc = tupDesc;

	/*
	 * Initialize result type and projection.
	 */
	ExecInitResultTypeTL(&gm_state->ps);
	ExecConditionalAssignProjectionInfo(&gm_state->ps, tupDesc, OUTER_VAR);

	/*
	 * Without projections result slot type is not trivially known, see
	 * comment above.
	 */
	if (gm_state->ps.ps_ProjInfo == NULL)
	{
		gm_state->ps.resultopsset = true;
		gm_state->ps.resultopsfixed = false;
	}

	/*
	 * initialize sort-key information
	 */
	if (node->numCols)
	{
		int			i;

		gm_state->gm_nkeys = node->numCols;
		gm_state->gm_sortkeys =
			palloc0(sizeof(SortSupportData) * node->numCols);

		for (i = 0; i < node->numCols; i++)
		{
			SortSupport sortKey = gm_state->gm_sortkeys + i;

			sortKey->ssup_cxt = CurrentMemoryContext;
			sortKey->ssup_collation = node->collations[i];
			sortKey->ssup_nulls_first = node->nullsFirst[i];
			sortKey->ssup_attno = node->sortColIdx[i];

			/*
			 * We don't perform abbreviated key conversion here, for the same
			 * reasons that it isn't used in MergeAppend
			 */
			sortKey->abbreviate = false;

			PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
		}
	}

	/* Now allocate the workspace for gather merge */
	gather_merge_setup(gm_state);

	return gm_state;
}
Пример #4
0
Datum
lquery_in(PG_FUNCTION_ARGS)
{
	char	   *buf = (char *) PG_GETARG_POINTER(0);
	char	   *ptr;
	int			num = 0,
				totallen = 0,
				numOR = 0;
	int			state = LQPRS_WAITLEVEL;
	lquery	   *result;
	nodeitem   *lptr = NULL;
	lquery_level *cur,
			   *curqlevel,
			   *tmpql;
	lquery_variant *lrptr = NULL;
	bool		hasnot = false;
	bool		wasbad = false;
	int			charlen;
	int			pos = 0;

	ptr = buf;
	while (*ptr)
	{
		charlen = pg_mblen(ptr);

		if (charlen == 1)
		{
			if (t_iseq(ptr, '.'))
				num++;
			else if (t_iseq(ptr, '|'))
				numOR++;
		}

		ptr += charlen;
	}

	num++;
	if (num > MaxAllocSize / ITEMSIZE)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
						num, (int) (MaxAllocSize / ITEMSIZE))));
	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
	ptr = buf;
	while (*ptr)
	{
		charlen = pg_mblen(ptr);

		if (state == LQPRS_WAITLEVEL)
		{
			if (ISALNUM(ptr))
			{
				GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
				lptr->start = ptr;
				state = LQPRS_WAITDELIM;
				curqlevel->numvar = 1;
			}
			else if (charlen == 1 && t_iseq(ptr, '!'))
			{
				GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
				lptr->start = ptr + 1;
				state = LQPRS_WAITDELIM;
				curqlevel->numvar = 1;
				curqlevel->flag |= LQL_NOT;
				hasnot = true;
			}
			else if (charlen == 1 && t_iseq(ptr, '*'))
				state = LQPRS_WAITOPEN;
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITVAR)
		{
			if (ISALNUM(ptr))
			{
				lptr++;
				lptr->start = ptr;
				state = LQPRS_WAITDELIM;
				curqlevel->numvar++;
			}
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITDELIM)
		{
			if (charlen == 1 && t_iseq(ptr, '@'))
			{
				if (lptr->start == ptr)
					UNCHAR;
				lptr->flag |= LVAR_INCASE;
				curqlevel->flag |= LVAR_INCASE;
			}
			else if (charlen == 1 && t_iseq(ptr, '*'))
			{
				if (lptr->start == ptr)
					UNCHAR;
				lptr->flag |= LVAR_ANYEND;
				curqlevel->flag |= LVAR_ANYEND;
			}
			else if (charlen == 1 && t_iseq(ptr, '%'))
			{
				if (lptr->start == ptr)
					UNCHAR;
				lptr->flag |= LVAR_SUBLEXEME;
				curqlevel->flag |= LVAR_SUBLEXEME;
			}
			else if (charlen == 1 && t_iseq(ptr, '|'))
			{
				lptr->len = ptr - lptr->start -
					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
				if (lptr->wlen > 255)
					ereport(ERROR,
							(errcode(ERRCODE_NAME_TOO_LONG),
							 errmsg("name of level is too long"),
							 errdetail("Name length is %d, must "
									   "be < 256, in position %d.",
									   lptr->wlen, pos)));

				state = LQPRS_WAITVAR;
			}
			else if (charlen == 1 && t_iseq(ptr, '.'))
			{
				lptr->len = ptr - lptr->start -
					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
				if (lptr->wlen > 255)
					ereport(ERROR,
							(errcode(ERRCODE_NAME_TOO_LONG),
							 errmsg("name of level is too long"),
							 errdetail("Name length is %d, must "
									   "be < 256, in position %d.",
									   lptr->wlen, pos)));

				state = LQPRS_WAITLEVEL;
				curqlevel = NEXTLEV(curqlevel);
			}
			else if (ISALNUM(ptr))
			{
				if (lptr->flag)
					UNCHAR;
			}
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITOPEN)
		{
			if (charlen == 1 && t_iseq(ptr, '{'))
				state = LQPRS_WAITFNUM;
			else if (charlen == 1 && t_iseq(ptr, '.'))
			{
				curqlevel->low = 0;
				curqlevel->high = 0xffff;
				curqlevel = NEXTLEV(curqlevel);
				state = LQPRS_WAITLEVEL;
			}
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITFNUM)
		{
			if (charlen == 1 && t_iseq(ptr, ','))
				state = LQPRS_WAITSNUM;
			else if (t_isdigit(ptr))
			{
				curqlevel->low = atoi(ptr);
				state = LQPRS_WAITND;
			}
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITSNUM)
		{
			if (t_isdigit(ptr))
			{
				curqlevel->high = atoi(ptr);
				state = LQPRS_WAITCLOSE;
			}
			else if (charlen == 1 && t_iseq(ptr, '}'))
			{
				curqlevel->high = 0xffff;
				state = LQPRS_WAITEND;
			}
			else
				UNCHAR;
		}
		else if (state == LQPRS_WAITCLOSE)
		{
			if (charlen == 1 && t_iseq(ptr, '}'))
				state = LQPRS_WAITEND;
			else if (!t_isdigit(ptr))
				UNCHAR;
		}
		else if (state == LQPRS_WAITND)
		{
			if (charlen == 1 && t_iseq(ptr, '}'))
			{
				curqlevel->high = curqlevel->low;
				state = LQPRS_WAITEND;
			}
			else if (charlen == 1 && t_iseq(ptr, ','))
				state = LQPRS_WAITSNUM;
			else if (!t_isdigit(ptr))
				UNCHAR;
		}
		else if (state == LQPRS_WAITEND)
		{
			if (charlen == 1 && t_iseq(ptr, '.'))
			{
				state = LQPRS_WAITLEVEL;
				curqlevel = NEXTLEV(curqlevel);
			}
			else
				UNCHAR;
		}
		else
			/* internal error */
			elog(ERROR, "internal error in parser");

		ptr += charlen;
		if (state == LQPRS_WAITDELIM)
			lptr->wlen++;
		pos++;
	}

	if (state == LQPRS_WAITDELIM)
	{
		if (lptr->start == ptr)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("syntax error"),
					 errdetail("Unexpected end of line.")));

		lptr->len = ptr - lptr->start -
			((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
			((lptr->flag & LVAR_INCASE) ? 1 : 0) -
			((lptr->flag & LVAR_ANYEND) ? 1 : 0);
		if (lptr->len == 0)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("syntax error"),
					 errdetail("Unexpected end of line.")));

		if (lptr->wlen > 255)
			ereport(ERROR,
					(errcode(ERRCODE_NAME_TOO_LONG),
					 errmsg("name of level is too long"),
					 errdetail("Name length is %d, must "
							   "be < 256, in position %d.",
							   lptr->wlen, pos)));
	}
	else if (state == LQPRS_WAITOPEN)
		curqlevel->high = 0xffff;
	else if (state != LQPRS_WAITEND)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("syntax error"),
				 errdetail("Unexpected end of line.")));

	curqlevel = tmpql;
	totallen = LQUERY_HDRSIZE;
	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
	{
		totallen += LQL_HDRSIZE;
		if (curqlevel->numvar)
		{
			lptr = GETVAR(curqlevel);
			while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
			{
				totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
				lptr++;
			}
		}
		else if (curqlevel->low > curqlevel->high)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("syntax error"),
					 errdetail("Low limit(%d) is greater than upper(%d).",
							   curqlevel->low, curqlevel->high)));

		curqlevel = NEXTLEV(curqlevel);
	}

	result = (lquery *) palloc0(totallen);
	SET_VARSIZE(result, totallen);
	result->numlevel = num;
	result->firstgood = 0;
	result->flag = 0;
	if (hasnot)
		result->flag |= LQUERY_HASNOT;
	cur = LQUERY_FIRST(result);
	curqlevel = tmpql;
	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
	{
		memcpy(cur, curqlevel, LQL_HDRSIZE);
		cur->totallen = LQL_HDRSIZE;
		if (curqlevel->numvar)
		{
			lrptr = LQL_FIRST(cur);
			lptr = GETVAR(curqlevel);
			while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
			{
				cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
				lrptr->len = lptr->len;
				lrptr->flag = lptr->flag;
				lrptr->val = ltree_crc32_sz(lptr->start, lptr->len);
				memcpy(lrptr->name, lptr->start, lptr->len);
				lptr++;
				lrptr = LVAR_NEXT(lrptr);
			}
			pfree(GETVAR(curqlevel));
			if (cur->numvar > 1 || cur->flag != 0)
				wasbad = true;
			else if (wasbad == false)
				(result->firstgood)++;
		}
		else
			wasbad = true;
		curqlevel = NEXTLEV(curqlevel);
		cur = LQL_NEXT(cur);
	}

	pfree(tmpql);
	PG_RETURN_POINTER(result);
}
Пример #5
0
/*
 * get_relation_info -
 *	  Retrieves catalog information for a given relation.
 *
 * Given the Oid of the relation, return the following info into fields
 * of the RelOptInfo struct:
 *
 *	min_attr	lowest valid AttrNumber
 *	max_attr	highest valid AttrNumber
 *	indexlist	list of IndexOptInfos for relation's indexes
 *	fdwroutine	if it's a foreign table, the FDW function pointers
 *	pages		number of pages
 *	tuples		number of tuples
 *
 * Also, initialize the attr_needed[] and attr_widths[] arrays.  In most
 * cases these are left as zeroes, but sometimes we need to compute attr
 * widths here, and we may as well cache the results for costsize.c.
 *
 * If inhparent is true, all we need to do is set up the attr arrays:
 * the RelOptInfo actually represents the appendrel formed by an inheritance
 * tree, and so the parent rel's physical size and index information isn't
 * important for it.
 */
void
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
				  RelOptInfo *rel)
{
	Index		varno = rel->relid;
	Relation	relation;
	bool		hasindex;
	List	   *indexinfos = NIL;

	/*
	 * We need not lock the relation since it was already locked, either by
	 * the rewriter or when expand_inherited_rtentry() added it to the query's
	 * rangetable.
	 */
	relation = heap_open(relationObjectId, NoLock);

	/* Temporary and unlogged relations are inaccessible during recovery. */
	if (!RelationNeedsWAL(relation) && RecoveryInProgress())
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary or unlogged relations during recovery")));

	rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
	rel->max_attr = RelationGetNumberOfAttributes(relation);
	rel->reltablespace = RelationGetForm(relation)->reltablespace;

	Assert(rel->max_attr >= rel->min_attr);
	rel->attr_needed = (Relids *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
	rel->attr_widths = (int32 *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));

	/*
	 * Estimate relation size --- unless it's an inheritance parent, in which
	 * case the size will be computed later in set_append_rel_pathlist, and we
	 * must leave it zero for now to avoid bollixing the total_table_pages
	 * calculation.
	 */
	if (!inhparent)
		estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
						  &rel->pages, &rel->tuples, &rel->allvisfrac);

	/*
	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
	 * Don't bother with indexes for an inheritance parent, either.
	 */
	if (inhparent ||
		(IgnoreSystemIndexes && IsSystemRelation(relation)))
		hasindex = false;
	else
		hasindex = relation->rd_rel->relhasindex;

	if (hasindex)
	{
		List	   *indexoidlist;
		ListCell   *l;
		LOCKMODE	lmode;

		indexoidlist = RelationGetIndexList(relation);

		/*
		 * For each index, we get the same type of lock that the executor will
		 * need, and do not release it.  This saves a couple of trips to the
		 * shared lock manager while not creating any real loss of
		 * concurrency, because no schema changes could be happening on the
		 * index while we hold lock on the parent rel, and neither lock type
		 * blocks any other kind of index operation.
		 */
		if (rel->relid == root->parse->resultRelation)
			lmode = RowExclusiveLock;
		else
			lmode = AccessShareLock;

		foreach(l, indexoidlist)
		{
			Oid			indexoid = lfirst_oid(l);
			Relation	indexRelation;
			Form_pg_index index;
			IndexOptInfo *info;
			int			ncolumns;
			int			i;

			/*
			 * Extract info from the relation descriptor for the index.
			 */
			indexRelation = index_open(indexoid, lmode);
			index = indexRelation->rd_index;

			/*
			 * Ignore invalid indexes, since they can't safely be used for
			 * queries.  Note that this is OK because the data structure we
			 * are constructing is only used by the planner --- the executor
			 * still needs to insert into "invalid" indexes, if they're marked
			 * IndexIsReady.
			 */
			if (!IndexIsValid(index))
			{
				index_close(indexRelation, NoLock);
				continue;
			}

			/*
			 * If the index is valid, but cannot yet be used, ignore it; but
			 * mark the plan we are generating as transient. See
			 * src/backend/access/heap/README.HOT for discussion.
			 */
			if (index->indcheckxmin &&
				!TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data),
									   TransactionXmin))
			{
				root->glob->transientPlan = true;
				index_close(indexRelation, NoLock);
				continue;
			}

			info = makeNode(IndexOptInfo);

			info->indexoid = index->indexrelid;
			info->reltablespace =
				RelationGetForm(indexRelation)->reltablespace;
			info->rel = rel;
			info->ncolumns = ncolumns = index->indnatts;
			info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
			info->indexcollations = (Oid *) palloc(sizeof(Oid) * ncolumns);
			info->opfamily = (Oid *) palloc(sizeof(Oid) * ncolumns);
			info->opcintype = (Oid *) palloc(sizeof(Oid) * ncolumns);

			for (i = 0; i < ncolumns; i++)
			{
				info->indexkeys[i] = index->indkey.values[i];
				info->indexcollations[i] = indexRelation->rd_indcollation[i];
				info->opfamily[i] = indexRelation->rd_opfamily[i];
				info->opcintype[i] = indexRelation->rd_opcintype[i];
			}

			info->relam = indexRelation->rd_rel->relam;
			info->amcostestimate = indexRelation->rd_am->amcostestimate;
			info->canreturn = index_can_return(indexRelation);
			info->amcanorderbyop = indexRelation->rd_am->amcanorderbyop;
			info->amoptionalkey = indexRelation->rd_am->amoptionalkey;
			info->amsearcharray = indexRelation->rd_am->amsearcharray;
			info->amsearchnulls = indexRelation->rd_am->amsearchnulls;
			info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple);
			info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap);

			/*
			 * Fetch the ordering information for the index, if any.
			 */
			if (info->relam == BTREE_AM_OID)
			{
				/*
				 * If it's a btree index, we can use its opfamily OIDs
				 * directly as the sort ordering opfamily OIDs.
				 */
				Assert(indexRelation->rd_am->amcanorder);

				info->sortopfamily = info->opfamily;
				info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns);
				info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns);

				for (i = 0; i < ncolumns; i++)
				{
					int16		opt = indexRelation->rd_indoption[i];

					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
				}
			}
			else if (indexRelation->rd_am->amcanorder)
			{
				/*
				 * Otherwise, identify the corresponding btree opfamilies by
				 * trying to map this index's "<" operators into btree.  Since
				 * "<" uniquely defines the behavior of a sort order, this is
				 * a sufficient test.
				 *
				 * XXX This method is rather slow and also requires the
				 * undesirable assumption that the other index AM numbers its
				 * strategies the same as btree.  It'd be better to have a way
				 * to explicitly declare the corresponding btree opfamily for
				 * each opfamily of the other index type.  But given the lack
				 * of current or foreseeable amcanorder index types, it's not
				 * worth expending more effort on now.
				 */
				info->sortopfamily = (Oid *) palloc(sizeof(Oid) * ncolumns);
				info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns);
				info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns);

				for (i = 0; i < ncolumns; i++)
				{
					int16		opt = indexRelation->rd_indoption[i];
					Oid			ltopr;
					Oid			btopfamily;
					Oid			btopcintype;
					int16		btstrategy;

					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;

					ltopr = get_opfamily_member(info->opfamily[i],
												info->opcintype[i],
												info->opcintype[i],
												BTLessStrategyNumber);
					if (OidIsValid(ltopr) &&
						get_ordering_op_properties(ltopr,
												   &btopfamily,
												   &btopcintype,
												   &btstrategy) &&
						btopcintype == info->opcintype[i] &&
						btstrategy == BTLessStrategyNumber)
					{
						/* Successful mapping */
						info->sortopfamily[i] = btopfamily;
					}
					else
					{
						/* Fail ... quietly treat index as unordered */
						info->sortopfamily = NULL;
						info->reverse_sort = NULL;
						info->nulls_first = NULL;
						break;
					}
				}
			}
			else
			{
				info->sortopfamily = NULL;
				info->reverse_sort = NULL;
				info->nulls_first = NULL;
			}

			/*
			 * Fetch the index expressions and predicate, if any.  We must
			 * modify the copies we obtain from the relcache to have the
			 * correct varno for the parent relation, so that they match up
			 * correctly against qual clauses.
			 */
			info->indexprs = RelationGetIndexExpressions(indexRelation);
			info->indpred = RelationGetIndexPredicate(indexRelation);
			if (info->indexprs && varno != 1)
				ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
			if (info->indpred && varno != 1)
				ChangeVarNodes((Node *) info->indpred, 1, varno, 0);

			/* Build targetlist using the completed indexprs data */
			info->indextlist = build_index_tlist(root, info, relation);

			info->predOK = false;		/* set later in indxpath.c */
			info->unique = index->indisunique;
			info->immediate = index->indimmediate;
			info->hypothetical = false;

			/*
			 * Estimate the index size.  If it's not a partial index, we lock
			 * the number-of-tuples estimate to equal the parent table; if it
			 * is partial then we have to use the same methods as we would for
			 * a table, except we can be sure that the index is not larger
			 * than the table.
			 */
			if (info->indpred == NIL)
			{
				info->pages = RelationGetNumberOfBlocks(indexRelation);
				info->tuples = rel->tuples;
			}
			else
			{
				double		allvisfrac; /* dummy */

				estimate_rel_size(indexRelation, NULL,
								  &info->pages, &info->tuples, &allvisfrac);
				if (info->tuples > rel->tuples)
					info->tuples = rel->tuples;
			}

			if (info->relam == BTREE_AM_OID)
			{
				/* For btrees, get tree height while we have the index open */
				info->tree_height = _bt_getrootheight(indexRelation);
			}
			else
			{
				/* For other index types, just set it to "unknown" for now */
				info->tree_height = -1;
			}

			index_close(indexRelation, NoLock);

			indexinfos = lcons(info, indexinfos);
		}

		list_free(indexoidlist);
	}
Пример #6
0
/*
 * Construct an inner tuple containing the given prefix and node array
 */
SpGistInnerTuple
spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
				  int nNodes, SpGistNodeTuple *nodes)
{
	SpGistInnerTuple tup;
	unsigned int size;
	unsigned int prefixSize;
	int			i;
	char	   *ptr;

	/* Compute size needed */
	if (hasPrefix)
		prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
	else
		prefixSize = 0;

	size = SGITHDRSZ + prefixSize;

	/* Note: we rely on node tuple sizes to be maxaligned already */
	for (i = 0; i < nNodes; i++)
		size += IndexTupleSize(nodes[i]);

	/*
	 * Ensure that we can replace the tuple with a dead tuple later.  This
	 * test is unnecessary given current tuple layouts, but let's be safe.
	 */
	if (size < SGDTSIZE)
		size = SGDTSIZE;

	/*
	 * Inner tuple should be small enough to fit on a page
	 */
	if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
						(Size) size,
						SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
			errhint("Values larger than a buffer page cannot be indexed.")));

	/*
	 * Check for overflow of header fields --- probably can't fail if the
	 * above succeeded, but let's be paranoid
	 */
	if (size > SGITMAXSIZE ||
		prefixSize > SGITMAXPREFIXSIZE ||
		nNodes > SGITMAXNNODES)
		elog(ERROR, "SPGiST inner tuple header field is too small");

	/* OK, form the tuple */
	tup = (SpGistInnerTuple) palloc0(size);

	tup->nNodes = nNodes;
	tup->prefixSize = prefixSize;
	tup->size = size;

	if (hasPrefix)
		memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);

	ptr = (char *) SGITNODEPTR(tup);

	for (i = 0; i < nNodes; i++)
	{
		SpGistNodeTuple node = nodes[i];

		memcpy(ptr, node, IndexTupleSize(node));
		ptr += IndexTupleSize(node);
	}

	return tup;
}
Пример #7
0
static Datum
gp_aovisimap_entry_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[4];
	bool		nulls[4];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		AppendOnlyVisimap visiMap;

		Relation parentRelation;

		IndexScanDesc indexScan;

		text *bitmapBuffer;
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;
		
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(4, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_num",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hidden_tupcount",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "bitmap",
						   TEXTOID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->parentRelation = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		AppendOnlyVisimap_Init(&context->visiMap,
				context->parentRelation->rd_appendonly->visimaprelid,
				context->parentRelation->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);

		context->indexScan = AppendOnlyVisimapStore_BeginScan(&
			context->visiMap.visimapStore, 0, NULL);

		context->bitmapBuffer = palloc0(VARHDRSZ + APPENDONLY_VISIMAP_MAX_RANGE + 1);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	if (AppendOnlyVisimapStore_GetNext(&context->visiMap.visimapStore,
				context->indexScan,
				ForwardScanDirection,
				&context->visiMap.visimapEntry,
				NULL))
	{
		AppendOnlyVisimapEntry *visimapEntry = &context->visiMap.visimapEntry;

		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = Int32GetDatum(visimapEntry->segmentFileNum);
		values[1] = Int64GetDatum(visimapEntry->firstRowNum);
		values[2] = Int32GetDatum(
				(int32)AppendOnlyVisimapEntry_GetHiddenTupleCount(visimapEntry));
		
		gp_aovisimap_encode_bitmap(VARDATA(context->bitmapBuffer), 
				visimapEntry->bitmap);
		SET_VARSIZE(context->bitmapBuffer, APPENDONLY_VISIMAP_MAX_RANGE);
		values[3] = PointerGetDatum(context->bitmapBuffer);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimapStore_EndScan(&context->visiMap.visimapStore,
			context->indexScan);
	AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock);
	heap_close(context->parentRelation, AccessShareLock);

	pfree(context->bitmapBuffer);
	pfree(context);
	funcctx->user_fctx = NULL;

	SRF_RETURN_DONE(funcctx);
}
Пример #8
0
static Datum
gp_aovisimap_hidden_info_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[3];
	bool		nulls[3];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		AppendOnlyVisimap visiMap;

		Relation parentRelation;

		FileSegInfo **appendonlySegfileInfo;
		AOCSFileSegInfo **aocsSegfileInfo;
		int segfile_info_total;

		int i;
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;

		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(3, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "hidden_tupcount",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "total_tupcount",
						   INT8OID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->parentRelation = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->parentRelation) || RelationIsAoCols(context->parentRelation)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		if (RelationIsAoRows(context->parentRelation))
		{
			context->appendonlySegfileInfo = GetAllFileSegInfo(
				context->parentRelation,
				SnapshotNow,
				&context->segfile_info_total);
		}
		else
		{
			Assert(RelationIsAoCols(context->parentRelation));
			context->aocsSegfileInfo = GetAllAOCSFileSegInfo(context->parentRelation,
					SnapshotNow, &context->segfile_info_total);
		}
		context->i = 0;

		AppendOnlyVisimap_Init(&context->visiMap,
				context->parentRelation->rd_appendonly->visimaprelid,
				context->parentRelation->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	while (context->i < context->segfile_info_total)
	{
		int64 tupcount;
		int segno;
		if (context->appendonlySegfileInfo)
		{
			FileSegInfo *fsinfo = context->appendonlySegfileInfo[context->i];
			tupcount = fsinfo->total_tupcount;
			segno = fsinfo->segno;
		}
		else if (context->aocsSegfileInfo)
		{
			AOCSFileSegInfo *fsinfo = context->aocsSegfileInfo[context->i];
			tupcount = fsinfo->total_tupcount;
			segno = fsinfo->segno;
		}
		else
		{
			Insist(false);
		}

		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = Int32GetDatum(segno);
		values[1] = Int64GetDatum(AppendOnlyVisimap_GetSegmentFileHiddenTupleCount(
					&context->visiMap, segno));
		values[2] = Int64GetDatum(tupcount);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		context->i++;
		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimap_Finish(&context->visiMap, AccessShareLock);
	if (context->appendonlySegfileInfo)
	{
		FreeAllSegFileInfo(context->appendonlySegfileInfo, context->segfile_info_total);
		pfree(context->appendonlySegfileInfo);
		context->appendonlySegfileInfo = NULL;
	}
	if (context->aocsSegfileInfo)
	{
		FreeAllAOCSSegFileInfo(context->aocsSegfileInfo, context->segfile_info_total);
		pfree(context->aocsSegfileInfo);
		context->aocsSegfileInfo = NULL;

	}
	heap_close(context->parentRelation, AccessShareLock);
	pfree(context);
	funcctx->user_fctx = NULL;

	SRF_RETURN_DONE(funcctx);
}
Пример #9
0
Datum
ginbulkdelete(PG_FUNCTION_ARGS)
{
	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
	Relation	index = info->index;
	BlockNumber blkno = GIN_ROOT_BLKNO;
	GinVacuumState gvs;
	Buffer		buffer;
	BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
	uint32		nRoot;

	gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
									   "Gin vacuum temporary context",
									   ALLOCSET_DEFAULT_MINSIZE,
									   ALLOCSET_DEFAULT_INITSIZE,
									   ALLOCSET_DEFAULT_MAXSIZE);
	gvs.index = index;
	gvs.callback = callback;
	gvs.callback_state = callback_state;
	gvs.strategy = info->strategy;
	initGinState(&gvs.ginstate, index);

	/* first time through? */
	if (stats == NULL)
	{
		/* Yes, so initialize stats to zeroes */
		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
		/* and cleanup any pending inserts */
		ginInsertCleanup(&gvs.ginstate, true, stats);
	}

	/* we'll re-count the tuples each time */
	stats->num_index_tuples = 0;
	gvs.result = stats;

	buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
								RBM_NORMAL, info->strategy);

	/* find leaf page */
	for (;;)
	{
		Page		page = BufferGetPage(buffer);
		IndexTuple	itup;

		LockBuffer(buffer, GIN_SHARE);

		Assert(!GinPageIsData(page));

		if (GinPageIsLeaf(page))
		{
			LockBuffer(buffer, GIN_UNLOCK);
			LockBuffer(buffer, GIN_EXCLUSIVE);

			if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
			{
				LockBuffer(buffer, GIN_UNLOCK);
				continue;		/* check it one more */
			}
			break;
		}

		Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);

		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
		blkno = GinGetDownlink(itup);
		Assert(blkno != InvalidBlockNumber);

		UnlockReleaseBuffer(buffer);
		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
									RBM_NORMAL, info->strategy);
	}

	/* right now we found leftmost page in entry's BTree */

	for (;;)
	{
		Page		page = BufferGetPage(buffer);
		Page		resPage;
		uint32		i;

		Assert(!GinPageIsData(page));

		resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);

		blkno = GinPageGetOpaque(page)->rightlink;

		if (resPage)
		{
			START_CRIT_SECTION();
			PageRestoreTempPage(resPage, page);
			MarkBufferDirty(buffer);
			xlogVacuumPage(gvs.index, buffer);
			UnlockReleaseBuffer(buffer);
			END_CRIT_SECTION();
		}
		else
		{
			UnlockReleaseBuffer(buffer);
		}

		vacuum_delay_point();

		for (i = 0; i < nRoot; i++)
		{
			ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
			vacuum_delay_point();
		}

		if (blkno == InvalidBlockNumber)		/* rightmost page */
			break;

		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
									RBM_NORMAL, info->strategy);
		LockBuffer(buffer, GIN_EXCLUSIVE);
	}

	MemoryContextDelete(gvs.tmpCxt);

	PG_RETURN_POINTER(gvs.result);
}
Пример #10
0
/*
 * scans posting tree and deletes empty pages
 */
static bool
ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
				DataPageDeleteStack *parent, OffsetNumber myoff)
{
	DataPageDeleteStack *me;
	Buffer		buffer;
	Page		page;
	bool		meDelete = FALSE;
	bool		isempty;

	if (isRoot)
	{
		me = parent;
	}
	else
	{
		if (!parent->child)
		{
			me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack));
			me->parent = parent;
			parent->child = me;
			me->leftBlkno = InvalidBlockNumber;
		}
		else
			me = parent->child;
	}

	buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
								RBM_NORMAL, gvs->strategy);
	page = BufferGetPage(buffer);

	Assert(GinPageIsData(page));

	if (!GinPageIsLeaf(page))
	{
		OffsetNumber i;

		me->blkno = blkno;
		for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
		{
			PostingItem *pitem = GinDataPageGetPostingItem(page, i);

			if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i))
				i--;
		}
	}

	if (GinPageIsLeaf(page))
		isempty = GinDataLeafPageIsEmpty(page);
	else
		isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber;

	if (isempty)
	{
		/* we never delete the left- or rightmost branch */
		if (me->leftBlkno != InvalidBlockNumber && !GinPageRightMost(page))
		{
			Assert(!isRoot);
			ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
			meDelete = TRUE;
		}
	}

	ReleaseBuffer(buffer);

	if (!meDelete)
		me->leftBlkno = blkno;

	return meDelete;
}
Пример #11
0
/*
 * Actually do a base backup for the specified tablespaces.
 *
 * This is split out mainly to avoid complaints about "variable might be
 * clobbered by longjmp" from stupider versions of gcc.
 */
static void
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
{
	XLogRecPtr	startptr;
	TimeLineID	starttli;
	XLogRecPtr	endptr;
	TimeLineID	endtli;
	char	   *labelfile;
	int			datadirpathlen;

	datadirpathlen = strlen(DataDir);

	backup_started_in_recovery = RecoveryInProgress();

	startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
								  &labelfile);
	SendXlogRecPtrResult(startptr, starttli);

	/*
	 * Calculate the relative path of temporary statistics directory
	 * in order to skip the files which are located in that directory later.
	 */
	if (is_absolute_path(pgstat_stat_directory) &&
		strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
		statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
	else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
		statrelpath = psprintf("./%s", pgstat_stat_directory);
	else
		statrelpath = pgstat_stat_directory;

	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
	{
		List	   *tablespaces = NIL;
		ListCell   *lc;
		struct dirent *de;
		tablespaceinfo *ti;

		/* Collect information about all tablespaces */
		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
		{
			char		fullpath[MAXPGPATH];
			char		linkpath[MAXPGPATH];
			char	   *relpath = NULL;
			int			rllen;

			/* Skip special stuff */
			if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
				continue;

			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);

#if defined(HAVE_READLINK) || defined(WIN32)
			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
			if (rllen < 0)
			{
				ereport(WARNING,
						(errmsg("could not read symbolic link \"%s\": %m",
								fullpath)));
				continue;
			}
			else if (rllen >= sizeof(linkpath))
			{
				ereport(WARNING,
						(errmsg("symbolic link \"%s\" target is too long",
								fullpath)));
				continue;
			}
			linkpath[rllen] = '\0';

			/*
			 * Relpath holds the relative path of the tablespace directory
			 * when it's located within PGDATA, or NULL if it's located
			 * elsewhere.
			 */
			if (rllen > datadirpathlen &&
				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
				IS_DIR_SEP(linkpath[datadirpathlen]))
				relpath = linkpath + datadirpathlen + 1;

			ti = palloc(sizeof(tablespaceinfo));
			ti->oid = pstrdup(de->d_name);
			ti->path = pstrdup(linkpath);
			ti->rpath = relpath ? pstrdup(relpath) : NULL;
			ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
			tablespaces = lappend(tablespaces, ti);
#else

			/*
			 * If the platform does not have symbolic links, it should not be
			 * possible to have tablespaces - clearly somebody else created
			 * them. Warn about it and ignore.
			 */
			ereport(WARNING,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				  errmsg("tablespaces are not supported on this platform")));
#endif
		}

		/* Add a node for the base directory at the end */
		ti = palloc0(sizeof(tablespaceinfo));
		ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
		tablespaces = lappend(tablespaces, ti);

		/* Send tablespace header */
		SendBackupHeader(tablespaces);

		/* Send off our tablespaces one by one */
		foreach(lc, tablespaces)
		{
			tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
			StringInfoData buf;

			/* Send CopyOutResponse message */
			pq_beginmessage(&buf, 'H');
			pq_sendbyte(&buf, 0);		/* overall format */
			pq_sendint(&buf, 0, 2);		/* natts */
			pq_endmessage(&buf);

			if (ti->path == NULL)
			{
				struct stat statbuf;

				/* In the main tar, include the backup_label first... */
				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);

				/* ... then the bulk of the files ... */
				sendDir(".", 1, false, tablespaces);

				/* ... and pg_control after everything else. */
				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not stat control file \"%s\": %m",
									XLOG_CONTROL_FILE)));
				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
			}
			else
				sendTablespace(ti->path, false);

			/*
			 * If we're including WAL, and this is the main data directory we
			 * don't terminate the tar stream here. Instead, we will append
			 * the xlog files below and terminate it then. This is safe since
			 * the main data directory is always sent *last*.
			 */
			if (opt->includewal && ti->path == NULL)
			{
				Assert(lnext(lc) == NULL);
			}
			else
				pq_putemptymessage('c');		/* CopyDone */
		}
	}
Пример #12
0
/* ----------------------------------------------------------------
 *		ExecHashTableCreate
 *
 *		create an empty hashtable data structure for hashjoin.
 * ----------------------------------------------------------------
 */
HashJoinTable
ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
{
	HashJoinTable hashtable;
	Plan	   *outerNode;
	int			nbuckets;
	int			nbatch;
	int			num_skew_mcvs;
	int			log2_nbuckets;
	int			nkeys;
	int			i;
	ListCell   *ho;
	MemoryContext oldcxt;

	/*
	 * Get information about the size of the relation to be hashed (it's the
	 * "outer" subtree of this node, but the inner relation of the hashjoin).
	 * Compute the appropriate size of the hash table.
	 */
	outerNode = outerPlan(node);

	ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
							OidIsValid(node->skewTable),
							&nbuckets, &nbatch, &num_skew_mcvs);

#ifdef HJDEBUG
	printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets);
#endif

	/* nbuckets must be a power of 2 */
	log2_nbuckets = my_log2(nbuckets);
	Assert(nbuckets == (1 << log2_nbuckets));

	/*
	 * Initialize the hash table control block.
	 *
	 * The hashtable control block is just palloc'd from the executor's
	 * per-query memory context.
	 */
	hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
	hashtable->nbuckets = nbuckets;
	hashtable->log2_nbuckets = log2_nbuckets;
	hashtable->buckets = NULL;
	hashtable->keepNulls = keepNulls;
	hashtable->skewEnabled = false;
	hashtable->skewBucket = NULL;
	hashtable->skewBucketLen = 0;
	hashtable->nSkewBuckets = 0;
	hashtable->skewBucketNums = NULL;
	hashtable->nbatch = nbatch;
	hashtable->curbatch = 0;
	hashtable->nbatch_original = nbatch;
	hashtable->nbatch_outstart = nbatch;
	hashtable->growEnabled = true;
	hashtable->totalTuples = 0;
	hashtable->innerBatchFile = NULL;
	hashtable->outerBatchFile = NULL;
	hashtable->spaceUsed = 0;
	hashtable->spacePeak = 0;
	hashtable->spaceAllowed = work_mem * 1024L;
	hashtable->spaceUsedSkew = 0;
	hashtable->spaceAllowedSkew =
		hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100;

	// cs3223, allocate memory for bitvector
	//printf(" Allocating memory for bitvector \n");
	hashtable->bitvector = (int*) palloc0(bitvector_size*1024); // sizeof(int)/32  = 1/8
	hashtable->numBVfilter = 0;
	hashtable->numProbNotJoin = 0;
	hashtable->firstCheck = 0;
	//printf("zero element: %d\n",hashtable->bitvector[0]);

	/*
	 * Get info about the hash functions to be used for each hash key. Also
	 * remember whether the join operators are strict.
	 */
	nkeys = list_length(hashOperators);
	hashtable->outer_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->inner_hashfunctions =
		(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
	hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
	i = 0;
	foreach(ho, hashOperators)
	{
		Oid			hashop = lfirst_oid(ho);
		Oid			left_hashfn;
		Oid			right_hashfn;

		if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
			elog(ERROR, "could not find hash function for hash operator %u",
				 hashop);
		fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
		fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
		hashtable->hashStrict[i] = op_strict(hashop);
		i++;
	}
Пример #13
0
/*
 * setup_regexp_matches --- do the initial matching for regexp_matches()
 *		or regexp_split()
 *
 * To avoid having to re-find the compiled pattern on each call, we do
 * all the matching in one swoop.  The returned regexp_matches_ctx contains
 * the locations of all the substrings matching the pattern.
 *
 * The four bool parameters have only two patterns (one for matching, one for
 * splitting) but it seems clearer to distinguish the functionality this way
 * than to key it all off one "is_split" flag. We don't currently assume that
 * fetching_unmatched is exclusive of fetching the matched text too; if it's
 * set, the conversion buffer is large enough to fetch any single matched or
 * unmatched string, but not any larger substring. (In practice, when splitting
 * the matches are usually small anyway, and it didn't seem worth complicating
 * the code further.)
 */
static regexp_matches_ctx *
setup_regexp_matches(text *orig_str, text *pattern, text *flags,
					 Oid collation,
					 bool force_glob,
					 bool use_subpatterns,
					 bool ignore_degenerate,
					 bool fetching_unmatched)
{
	regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
	int			eml = pg_database_encoding_max_length();
	int			orig_len;
	pg_wchar   *wide_str;
	int			wide_len;
	pg_re_flags re_flags;
	regex_t    *cpattern;
	regmatch_t *pmatch;
	int			pmatch_len;
	int			array_len;
	int			array_idx;
	int			prev_match_end;
	int			prev_valid_match_end;
	int			start_search;
	int			maxlen = 0;		/* largest fetch length in characters */

	/* save original string --- we'll extract result substrings from it */
	matchctx->orig_str = orig_str;

	/* convert string to pg_wchar form for matching */
	orig_len = VARSIZE_ANY_EXHDR(orig_str);
	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);

	/* determine options */
	parse_re_flags(&re_flags, flags);
	if (force_glob)
	{
		/* user mustn't specify 'g' for regexp_split */
		if (re_flags.glob)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("regexp_split does not support the global option")));
		/* but we find all the matches anyway */
		re_flags.glob = true;
	}

	/* set up the compiled pattern */
	cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);

	/* do we want to remember subpatterns? */
	if (use_subpatterns && cpattern->re_nsub > 0)
	{
		matchctx->npatterns = cpattern->re_nsub;
		pmatch_len = cpattern->re_nsub + 1;
	}
	else
	{
		use_subpatterns = false;
		matchctx->npatterns = 1;
		pmatch_len = 1;
	}

	/* temporary output space for RE package */
	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);

	/*
	 * the real output space (grown dynamically if needed)
	 *
	 * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
	 * than at 2^27
	 */
	array_len = re_flags.glob ? 255 : 31;
	matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
	array_idx = 0;

	/* search for the pattern, perhaps repeatedly */
	prev_match_end = 0;
	prev_valid_match_end = 0;
	start_search = 0;
	while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
							pmatch_len, pmatch))
	{
		/*
		 * If requested, ignore degenerate matches, which are zero-length
		 * matches occurring at the start or end of a string or just after a
		 * previous match.
		 */
		if (!ignore_degenerate ||
			(pmatch[0].rm_so < wide_len &&
			 pmatch[0].rm_eo > prev_match_end))
		{
			/* enlarge output space if needed */
			while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
			{
				array_len += array_len + 1;		/* 2^n-1 => 2^(n+1)-1 */
				if (array_len > MaxAllocSize/sizeof(int))
					ereport(ERROR,
							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
							 errmsg("too many regular expression matches")));
				matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
													sizeof(int) * array_len);
			}

			/* save this match's locations */
			if (use_subpatterns)
			{
				int			i;

				for (i = 1; i <= matchctx->npatterns; i++)
				{
					int		so = pmatch[i].rm_so;
					int		eo = pmatch[i].rm_eo;
					matchctx->match_locs[array_idx++] = so;
					matchctx->match_locs[array_idx++] = eo;
					if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
						maxlen = (eo - so);
				}
			}
			else
			{
				int		so = pmatch[0].rm_so;
				int		eo = pmatch[0].rm_eo;
				matchctx->match_locs[array_idx++] = so;
				matchctx->match_locs[array_idx++] = eo;
				if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
					maxlen = (eo - so);
			}
			matchctx->nmatches++;

			/*
			 * check length of unmatched portion between end of previous valid
			 * (nondegenerate, or degenerate but not ignored) match and start
			 * of current one
			 */
			if (fetching_unmatched &&
				pmatch[0].rm_so >= 0 &&
				(pmatch[0].rm_so - prev_valid_match_end) > maxlen)
				maxlen = (pmatch[0].rm_so - prev_valid_match_end);
			prev_valid_match_end = pmatch[0].rm_eo;
		}
		prev_match_end = pmatch[0].rm_eo;

		/* if not glob, stop after one match */
		if (!re_flags.glob)
			break;

		/*
		 * Advance search position.  Normally we start the next search at the
		 * end of the previous match; but if the match was of zero length, we
		 * have to advance by one character, or we'd just find the same match
		 * again.
		 */
		start_search = prev_match_end;
		if (pmatch[0].rm_so == pmatch[0].rm_eo)
			start_search++;
		if (start_search > wide_len)
			break;
	}

	/*
	 * check length of unmatched portion between end of last match and end of
	 * input string
	 */
	if (fetching_unmatched &&
		(wide_len - prev_valid_match_end) > maxlen)
		maxlen = (wide_len - prev_valid_match_end);

	/*
	 * Keep a note of the end position of the string for the benefit of
	 * splitting code.
	 */
	matchctx->match_locs[array_idx] = wide_len;

	if (eml > 1)
	{
		int64		maxsiz = eml * (int64) maxlen;
		int			conv_bufsiz;

		/*
		 * Make the conversion buffer large enough for any substring of
		 * interest.
		 *
		 * Worst case: assume we need the maximum size (maxlen*eml), but take
		 * advantage of the fact that the original string length in bytes is an
		 * upper bound on the byte length of any fetched substring (and we know
		 * that len+1 is safe to allocate because the varlena header is longer
		 * than 1 byte).
		 */
		if (maxsiz > orig_len)
			conv_bufsiz = orig_len + 1;
		else
			conv_bufsiz = maxsiz + 1;	/* safe since maxsiz < 2^30 */

		matchctx->conv_buf = palloc(conv_bufsiz);
		matchctx->conv_bufsiz = conv_bufsiz;
		matchctx->wide_str = wide_str;
	}
	else
	{
		/* No need to keep the wide string if we're in a single-byte charset. */
		pfree(wide_str);
		matchctx->wide_str = NULL;
		matchctx->conv_buf = NULL;
		matchctx->conv_bufsiz = 0;
	}

	/* Clean up temp storage */
	pfree(pmatch);

	return matchctx;
}
Пример #14
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 *
 *		The return value indicates whether this function has held off
 *		interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
 */
bool
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
	bool		heldoff = false;

	pg_rusage_init(&ru0);

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
		starttime = GetCurrentTimestamp();

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return false;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 *
	 * Note that after we've truncated the heap, it's too late to abort the
	 * transaction; doing so would lose the sinval messages needed to tell
	 * the other backends about the table being shrunk.  We prevent interrupts
	 * in that case; caller is responsible for re-enabling them after
	 * committing the transaction.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
	{
		HOLD_INTERRUPTS();
		heldoff = true;
		lazy_truncate_heap(onerel, vacrelstats);
	}

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	if (vacrelstats->tot_free_pages > MaxFSMPages)
		ereport(WARNING,
				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
						get_namespace_name(RelationGetNamespace(onerel)),
						RelationGetRelationName(onerel)),
				 /* Only suggest VACUUM FULL if > 20% free */
				 (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
				 errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
				 errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));

	/* Update statistics in pg_class */
	vac_update_relstats_from_list(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);

	if (gp_indexcheck_vacuum == INDEX_CHECK_ALL ||
		(gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM &&
		 PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel)))
	{
		int			i;

		for (i = 0; i < nindexes; i++)
		{
			if (Irel[i]->rd_rel->relam == BTREE_AM_OID)
				_bt_validate_vacuum(Irel[i], onerel, OldestXmin);
		}
	}

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
									   Log_autovacuum_min_duration))
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
						  vacrelstats->pages_removed, vacrelstats->rel_pages,
						vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
							pg_rusage_show(&ru0))));
	}

	return heldoff;
}
Пример #15
0
/*
 * Initializes the dictionary for use in backends - checks whether such dictionary 
 * and list of stopwords is already used, and if not then parses it and loads it into
 * the shared segment.
 * 
 * This is called through dispell_init() which is responsible for proper locking
 * of the shared memory (using SegmentInfo->lock).
 */
static
void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * stopFile) {

    int size;

    SharedIspellDict * shdict = NULL;
    SharedStopList * shstop   = NULL;

    IspellDict * dict;
    StopList    stoplist;
    
    /* DICTIONARY + AFFIXES */
    
    /* TODO This should probably check that the filenames are not NULL, and maybe that
     * it exists. Or maybe that's handled by the NIImport* functions. */

    /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */
    shdict = get_shared_dict(dictFile, affFile);

    /* load the dictionary / affixes if not yet defined */
    if (shdict == NULL) {

        dict = (IspellDict *)palloc0(sizeof(IspellDict));

        NIStartBuild(dict);

        NIImportDictionary(dict,
                           get_tsearch_config_filename(dictFile, "dict"));

        NIImportAffixes(dict,
                        get_tsearch_config_filename(affFile, "affix"));

        NISortDictionary(dict);
        NISortAffixes(dict);

        NIFinishBuild(dict);

        /* check available space in shared segment */
        size = sizeIspellDict(dict, dictFile, affFile);
        if (size > segment_info->available)
            elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %ld B available",
                 dictFile, affFile, size, segment_info->available);

        /* fine, there's enough space - copy the dictionary */
        shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell);

        elog(INFO, "shared dictionary %s.dict / %s.affix loaded, used %d B, %ld B remaining",
             dictFile, affFile, size, segment_info->available);

        /* add the new dictionary to the linked list (of SharedIspellDict structures) */
        shdict->next = segment_info->dict;
        segment_info->dict = shdict;

    }
    
    /* STOP WORDS */

    /* lookup if the stop words are already loaded in the shared segment, but only if there
     * actually is a list */
    if (stopFile != NULL) {

        shstop = get_shared_stop_list(stopFile);

        /* load the stopwords if not yet defined */
        if (shstop == NULL) {

            readstoplist(stopFile, &stoplist, lowerstr);

            size = sizeStopList(&stoplist, stopFile);
            if (size > segment_info->available) {
                elog(ERROR, "shared stoplist %s.stop needs %d B, only %ld B available",
                     stopFile, size, segment_info->available);
            }

            /* fine, there's enough space - copy the stoplist */
            shstop = copyStopList(&stoplist, stopFile, size);

            elog(INFO, "shared stoplist %s.stop loaded, used %d B, %ld B remaining",
                 affFile, size, segment_info->available);

            /* add the new stopword list to the linked list (of SharedStopList structures) */
            shstop->next = segment_info->stop;
            segment_info->stop = shstop;

        }
    }

    /* Now, fill the DictInfo structure for the backend (references to dictionary,
     * stopwords and the filenames). */

    info->dict = shdict;
    info->stop = shstop;
    info->lookup = GetCurrentTimestamp();

    memcpy(info->dictFile, dictFile, strlen(dictFile) + 1);
    memcpy(info->affixFile, dictFile, strlen(affFile)+ 1);
    memcpy(info->stopFile, dictFile, strlen(stopFile) + 1);

}
Пример #16
0
/*
 * lazy_vacuum_aorel -- perform LAZY VACUUM for one Append-only relation.
 */
static void
lazy_vacuum_aorel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	bool		update_relstats = true;

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	if (vacuumStatement_IsInAppendOnlyPreparePhase(vacstmt))
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum prepare phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_indexes(onerel, vacstmt, updated_stats);
		if (RelationIsAoRows(onerel))
			AppendOnlyTruncateToEOF(onerel);
		else
			AOCSTruncateToEOF(onerel);

		/*
		 * MPP-23647.  For empty tables, we skip compaction phase
		 * and cleanup phase.  Therefore, we update the stats
		 * (specifically, relfrozenxid) in prepare phase if the
		 * table is empty.  Otherwise, the stats will be updated in
		 * the cleanup phase, when we would have computed the
		 * correct values for stats.
		 */
		if (vacstmt->appendonly_relation_empty)
		{
			update_relstats = true;
			/*
			 * For an empty relation, the only stats we care about
			 * is relfrozenxid and relhasindex.  We need to be
			 * mindful of correctly setting relhasindex here.
			 * relfrozenxid is already taken care of above by
			 * calling vacuum_set_xid_limits().
			 */
			vacrelstats->hasindex = onerel->rd_rel->relhasindex;
		}
		else
		{
			/*
			 * For a non-empty relation, follow the usual
			 * compaction phases and do not update stats in
			 * prepare phase.
			 */
			update_relstats = false;
	}
	}
	else if (!vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
		vacuum_appendonly_rel(onerel, vacstmt);
		update_relstats = false;
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
			   "Vacuum cleanup phase %s", RelationGetRelationName(onerel));

		vacuum_appendonly_fill_stats(onerel, ActiveSnapshot,
									 &vacrelstats->rel_pages,
									 &vacrelstats->rel_tuples,
									 &vacrelstats->hasindex);
		/* reset the remaining LVRelStats values */
		vacrelstats->nonempty_pages = 0;
		vacrelstats->num_dead_tuples = 0;
		vacrelstats->max_dead_tuples = 0;
		vacrelstats->tuples_deleted = 0;
		vacrelstats->tot_free_pages = 0;
		vacrelstats->fs_is_heap = false;
		vacrelstats->num_free_pages = 0;
		vacrelstats->max_free_pages = 0;
		vacrelstats->pages_removed = 0;
	}

	if (update_relstats)
	{
		/* Update statistics in pg_class */
		vac_update_relstats(onerel,
							vacrelstats->rel_pages,
							vacrelstats->rel_tuples,
							vacrelstats->hasindex,
							FreezeLimit,
							updated_stats);

		/* report results to the stats collector, too */
		pgstat_report_vacuum(RelationGetRelid(onerel),
							 onerel->rd_rel->relisshared,
							 true /*vacrelstats->scanned_all*/,
							 vacstmt->analyze, vacrelstats->rel_tuples);
	}
}
Пример #17
0
/*
 *	lazy_scan_heap() -- scan an open heap relation
 *
 *		This routine sets commit status bits, builds lists of dead tuples
 *		and pages with free space, and calculates statistics on the number
 *		of live tuples in the heap.  When done, or when we run low on space
 *		for dead-tuple TIDs, invoke vacuuming of indexes and heap.
 *
 *		If there are no indexes then we just vacuum each dirty page as we
 *		process it, since there's no point in gathering many tuples.
 */
static void
lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
			   Relation *Irel, int nindexes, List *updated_stats, List *all_extra_oids)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

	BlockNumber nblocks,
				blkno;
	HeapTupleData tuple;
	char	   *relname;
	BlockNumber empty_pages,
				vacuumed_pages;
	double		num_tuples,
				tups_vacuumed,
				nkeep,
				nunused;
	IndexBulkDeleteResult **indstats;
	int			i;
	int reindex_count = 1;
	PGRUsage	ru0;

	/* Fetch gp_persistent_relation_node information that will be added to XLOG record. */
	RelationFetchGpRelationNodeForXLog(onerel);

	pg_rusage_init(&ru0);

	relname = RelationGetRelationName(onerel);
	ereport(elevel,
			(errmsg("vacuuming \"%s.%s\"",
					get_namespace_name(RelationGetNamespace(onerel)),
					relname)));

	empty_pages = vacuumed_pages = 0;
	num_tuples = tups_vacuumed = nkeep = nunused = 0;

	indstats = (IndexBulkDeleteResult **)
		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));

	nblocks = RelationGetNumberOfBlocks(onerel);
	vacrelstats->rel_pages = nblocks;
	vacrelstats->nonempty_pages = 0;

	lazy_space_alloc(vacrelstats, nblocks);

	for (blkno = 0; blkno < nblocks; blkno++)
	{
		Buffer		buf;
		Page		page;
		OffsetNumber offnum,
					maxoff;
		bool		tupgone,
					hastup;
		int			prev_dead_count;
		OffsetNumber frozen[MaxOffsetNumber];
		int			nfrozen;

		vacuum_delay_point();

		/*
		 * If we are close to overrunning the available space for dead-tuple
		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
		 */
		if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
			vacrelstats->num_dead_tuples > 0)
		{
			/* Remove index entries */
			for (i = 0; i < nindexes; i++)
			{
				List *extra_oids = get_oids_for_bitmap(all_extra_oids, Irel[i],
													   onerel, reindex_count);

				lazy_vacuum_index(Irel[i],
								  &indstats[i],
								  vacrelstats,
								  extra_oids);
				list_free(extra_oids);
			}
			reindex_count++;

			/* Remove tuples from heap */
			lazy_vacuum_heap(onerel, vacrelstats);
			/* Forget the now-vacuumed tuples, and press on */
			vacrelstats->num_dead_tuples = 0;
		}

		/* -------- MirroredLock ---------- */
		MIRROREDLOCK_BUFMGR_LOCK;

		buf = ReadBuffer(onerel, blkno);

		/* Initially, we only need shared access to the buffer */
		LockBuffer(buf, BUFFER_LOCK_SHARE);

		page = BufferGetPage(buf);

		if (PageIsNew(page))
		{
			/*
			 * An all-zeroes page could be left over if a backend extends the
			 * relation but crashes before initializing the page. Reclaim such
			 * pages for use.
			 *
			 * We have to be careful here because we could be looking at a
			 * page that someone has just added to the relation and not yet
			 * been able to initialize (see RelationGetBufferForTuple). To
			 * protect against that, release the buffer lock, grab the
			 * relation extension lock momentarily, and re-lock the buffer. If
			 * the page is still uninitialized by then, it must be left over
			 * from a crashed backend, and we can initialize it.
			 *
			 * We don't really need the relation lock when this is a new or
			 * temp relation, but it's probably not worth the code space to
			 * check that, since this surely isn't a critical path.
			 *
			 * Note: the comparable code in vacuum.c need not worry because
			 * it's got exclusive lock on the whole relation.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			MIRROREDLOCK_BUFMGR_UNLOCK;
			/* -------- MirroredLock ---------- */

			LockRelationForExtension(onerel, ExclusiveLock);
			UnlockRelationForExtension(onerel, ExclusiveLock);

			/* -------- MirroredLock ---------- */
			MIRROREDLOCK_BUFMGR_LOCK;

			LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);  /* LockBufferForCleanup(buf)? */
			if (PageIsNew(page))
			{
				ereport(WARNING,
				(errmsg("relation \"%s\" page %u is uninitialized --- fixing",
						relname, blkno)));
				PageInit(page, BufferGetPageSize(buf), 0);

				/* must record in xlog so that changetracking will know about this change */
				log_heap_newpage(onerel, page, blkno);

				empty_pages++;
				lazy_record_free_space(vacrelstats, blkno,
									   PageGetFreeSpace(page));
			}
			MarkBufferDirty(buf);
			UnlockReleaseBuffer(buf);

			MIRROREDLOCK_BUFMGR_UNLOCK;
			/* -------- MirroredLock ---------- */

			continue;
		}

		if (PageIsEmpty(page))
		{
			empty_pages++;
			lazy_record_free_space(vacrelstats, blkno,
								   PageGetFreeSpace(page));
			UnlockReleaseBuffer(buf);

			MIRROREDLOCK_BUFMGR_UNLOCK;
			/* -------- MirroredLock ---------- */

			continue;
		}

		nfrozen = 0;
		hastup = false;
		prev_dead_count = vacrelstats->num_dead_tuples;
		maxoff = PageGetMaxOffsetNumber(page);
		for (offnum = FirstOffsetNumber;
			 offnum <= maxoff;
			 offnum = OffsetNumberNext(offnum))
		{
			ItemId		itemid;

			itemid = PageGetItemId(page, offnum);

			if (!ItemIdIsUsed(itemid))
			{
				nunused += 1;
				continue;
			}

			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
			tuple.t_len = ItemIdGetLength(itemid);
			ItemPointerSet(&(tuple.t_self), blkno, offnum);

			tupgone = false;

			switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf, false))
			{
				case HEAPTUPLE_DEAD:
					tupgone = true;		/* we can delete the tuple */
					break;
				case HEAPTUPLE_LIVE:
					/* Tuple is good --- but let's do some validity checks */
					if (onerel->rd_rel->relhasoids &&
						!OidIsValid(HeapTupleGetOid(&tuple)))
						elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
							 relname, blkno, offnum);
					break;
				case HEAPTUPLE_RECENTLY_DEAD:

					/*
					 * If tuple is recently deleted then we must not remove it
					 * from relation.
					 */
					nkeep += 1;
					break;
				case HEAPTUPLE_INSERT_IN_PROGRESS:
					/* This is an expected case during concurrent vacuum */
					break;
				case HEAPTUPLE_DELETE_IN_PROGRESS:
					/* This is an expected case during concurrent vacuum */
					break;
				default:
					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
					break;
			}

			if (tupgone)
			{
				lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
				tups_vacuumed += 1;
			}
			else
			{
				num_tuples += 1;
				hastup = true;

				/*
				 * Each non-removable tuple must be checked to see if it
				 * needs freezing.  If we already froze anything, then
				 * we've already switched the buffer lock to exclusive.
				 */
				if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
									  (nfrozen > 0) ? InvalidBuffer : buf))
					frozen[nfrozen++] = offnum;
			}
		}						/* scan along page */

		/*
		 * If we froze any tuples, mark the buffer dirty, and write a WAL
		 * record recording the changes.  We must log the changes to be
		 * crash-safe against future truncation of CLOG.
		 */
		if (nfrozen > 0)
		{
			MarkBufferDirty(buf);
			/* no XLOG for temp tables, though */
			if (!onerel->rd_istemp)
			{
				XLogRecPtr	recptr;

				recptr = log_heap_freeze(onerel, buf, FreezeLimit,
										 frozen, nfrozen);
				PageSetLSN(page, recptr);
				PageSetTLI(page, ThisTimeLineID);
			}
		}

		/*
		 * If there are no indexes then we can vacuum the page right now
		 * instead of doing a second scan.
		 */
		if (nindexes == 0 &&
			vacrelstats->num_dead_tuples > 0)
		{
			/* Trade in buffer share lock for super-exclusive lock */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
			LockBufferForCleanup(buf);

			/* Remove tuples from heap */
			lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
			/* Forget the now-vacuumed tuples, and press on */
			vacrelstats->num_dead_tuples = 0;
			vacuumed_pages++;
		}

		/*
		 * If we remembered any tuples for deletion, then the page will be
		 * visited again by lazy_vacuum_heap, which will compute and record
		 * its post-compaction free space.	If not, then we're done with this
		 * page, so remember its free space as-is.	(This path will always be
		 * taken if there are no indexes.)
		 */
		if (vacrelstats->num_dead_tuples == prev_dead_count)
		{
			lazy_record_free_space(vacrelstats, blkno,
								   PageGetFreeSpace(page));
		}

		/* Remember the location of the last page with nonremovable tuples */
		if (hastup)
			vacrelstats->nonempty_pages = blkno + 1;

		UnlockReleaseBuffer(buf);

		MIRROREDLOCK_BUFMGR_UNLOCK;
		/* -------- MirroredLock ---------- */

	}

	/* save stats for use later */
	vacrelstats->rel_tuples = num_tuples;
	vacrelstats->tuples_deleted = tups_vacuumed;

	/* If any tuples need to be deleted, perform final vacuum cycle */
	/* XXX put a threshold on min number of tuples here? */
	if (vacrelstats->num_dead_tuples > 0)
	{
		/* Remove index entries */
		for (i = 0; i < nindexes; i++)
		{
			List *extra_oids = get_oids_for_bitmap(all_extra_oids, Irel[i],
												   onerel, reindex_count);

			lazy_vacuum_index(Irel[i],
							  &indstats[i],
							  vacrelstats,
							  extra_oids);
			list_free(extra_oids);
		}
		reindex_count++;

		/* Remove tuples from heap */
		lazy_vacuum_heap(onerel, vacrelstats);
	}

	/* Do post-vacuum cleanup and statistics update for each index */
	for (i = 0; i < nindexes; i++)
		lazy_cleanup_index(Irel[i], indstats[i], vacrelstats, updated_stats);

	/* If no indexes, make log report that lazy_vacuum_heap would've made */
	if (vacuumed_pages)
		ereport(elevel,
				(errmsg("\"%s\": removed %.0f row versions in %u pages",
						RelationGetRelationName(onerel),
						tups_vacuumed, vacuumed_pages)));

	ereport(elevel,
			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
					RelationGetRelationName(onerel),
					tups_vacuumed, num_tuples, nblocks),
			 errdetail("%.0f dead row versions cannot be removed yet.\n"
					   "There were %.0f unused item pointers.\n"
					   "%u pages contain useful free space.\n"
					   "%u pages are entirely empty.\n"
					   "%s.",
					   nkeep,
					   nunused,
					   vacrelstats->tot_free_pages,
					   empty_pages,
					   pg_rusage_show(&ru0))));

	if (vacrelstats->tot_free_pages > MaxFSMPages)
		ereport(WARNING,
				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
						get_namespace_name(RelationGetNamespace(onerel)),
						relname),
				 errhint("Consider compacting this relation or increasing the configuration parameter \"max_fsm_pages\".")));
}
Пример #18
0
Datum
ginvacuumcleanup(PG_FUNCTION_ARGS)
{
	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
	Relation	index = info->index;
	bool		needLock;
	BlockNumber npages,
				blkno;
	BlockNumber totFreePages;
	GinState	ginstate;
	GinStatsData idxStat;

	/*
	 * In an autovacuum analyze, we want to clean up pending insertions.
	 * Otherwise, an ANALYZE-only call is a no-op.
	 */
	if (info->analyze_only)
	{
		if (IsAutoVacuumWorkerProcess())
		{
			initGinState(&ginstate, index);
			ginInsertCleanup(&ginstate, true, stats);
		}
		PG_RETURN_POINTER(stats);
	}

	/*
	 * Set up all-zero stats and cleanup pending inserts if ginbulkdelete
	 * wasn't called
	 */
	if (stats == NULL)
	{
		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
		initGinState(&ginstate, index);
		ginInsertCleanup(&ginstate, true, stats);
	}

	memset(&idxStat, 0, sizeof(idxStat));

	/*
	 * XXX we always report the heap tuple count as the number of index
	 * entries.  This is bogus if the index is partial, but it's real hard to
	 * tell how many distinct heap entries are referenced by a GIN index.
	 */
	stats->num_index_tuples = info->num_heap_tuples;
	stats->estimated_count = info->estimated_count;

	/*
	 * Need lock unless it's local to this backend.
	 */
	needLock = !RELATION_IS_LOCAL(index);

	if (needLock)
		LockRelationForExtension(index, ExclusiveLock);
	npages = RelationGetNumberOfBlocks(index);
	if (needLock)
		UnlockRelationForExtension(index, ExclusiveLock);

	totFreePages = 0;

	for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
	{
		Buffer		buffer;
		Page		page;

		vacuum_delay_point();

		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
									RBM_NORMAL, info->strategy);
		LockBuffer(buffer, GIN_SHARE);
		page = (Page) BufferGetPage(buffer);

		if (GinPageIsDeleted(page))
		{
			Assert(blkno != GIN_ROOT_BLKNO);
			RecordFreeIndexPage(index, blkno);
			totFreePages++;
		}
		else if (GinPageIsData(page))
		{
			idxStat.nDataPages++;
		}
		else if (!GinPageIsList(page))
		{
			idxStat.nEntryPages++;

			if (GinPageIsLeaf(page))
				idxStat.nEntries += PageGetMaxOffsetNumber(page);
		}

		UnlockReleaseBuffer(buffer);
	}

	/* Update the metapage with accurate page and entry counts */
	idxStat.nTotalPages = npages;
	ginUpdateStats(info->index, &idxStat);

	/* Finally, vacuum the FSM */
	IndexFreeSpaceMapVacuum(info->index);

	stats->pages_free = totFreePages;

	if (needLock)
		LockRelationForExtension(index, ExclusiveLock);
	stats->num_pages = RelationGetNumberOfBlocks(index);
	if (needLock)
		UnlockRelationForExtension(index, ExclusiveLock);

	PG_RETURN_POINTER(stats);
}
Пример #19
0
static Datum
gp_aovisimap_internal(PG_FUNCTION_ARGS, Oid aoRelOid)
{
	Datum		values[3];
	bool		nulls[3];
	HeapTuple tuple;
	Datum result;

	typedef struct Context
	{
		Relation aorel;
		AppendOnlyVisimapScan visiMapScan;
		AOTupleId aoTupleId;
		
	} Context;
	
	FuncCallContext *funcctx;
	Context *context;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;
		
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function
		 * calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		tupdesc = CreateTemplateTupleDesc(3, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tid",
						   TIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "segno",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "row_num",
						   INT8OID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		context = (Context *) palloc0(sizeof(Context));

		context->aorel = heap_open(aoRelOid, AccessShareLock);
		if (!(RelationIsAoRows(context->aorel) || RelationIsAoCols(context->aorel)))
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("Function not supported on relation")));
		}

		AppendOnlyVisimapScan_Init(&context->visiMapScan,
				context->aorel->rd_appendonly->visimaprelid,
				context->aorel->rd_appendonly->visimapidxid,
				AccessShareLock,
				SnapshotNow);
		AOTupleIdInit_Init(&context->aoTupleId);

		funcctx->user_fctx = (void *) context;

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (Context *) funcctx->user_fctx;

	while (true)
	{
		if (!AppendOnlyVisimapScan_GetNextInvisible(
				&context->visiMapScan,
				&context->aoTupleId))
		{
			break;
		}
		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));
		values[0] = ItemPointerGetDatum((ItemPointer)&context->aoTupleId);
		values[1] = Int32GetDatum(AOTupleIdGet_segmentFileNum(&context->aoTupleId));
		values[2] = Int64GetDatum(AOTupleIdGet_rowNum(&context->aoTupleId));

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	
	AppendOnlyVisimapScan_Finish(&context->visiMapScan, AccessShareLock);
	heap_close(context->aorel, AccessShareLock);
	pfree(context);
	funcctx->user_fctx = NULL;
	SRF_RETURN_DONE(funcctx);
}
Пример #20
0
/*
 * get_relation_info -
 *	  Retrieves catalog information for a given relation.
 *
 * Given the Oid of the relation, return the following info into fields
 * of the RelOptInfo struct:
 *
 *	min_attr	lowest valid AttrNumber
 *	max_attr	highest valid AttrNumber
 *	indexlist	list of IndexOptInfos for relation's indexes
 *	pages		number of pages
 *	tuples		number of tuples
 *
 * Also, initialize the attr_needed[] and attr_widths[] arrays.  In most
 * cases these are left as zeroes, but sometimes we need to compute attr
 * widths here, and we may as well cache the results for costsize.c.
 *
 * If inhparent is true, all we need to do is set up the attr arrays:
 * the RelOptInfo actually represents the appendrel formed by an inheritance
 * tree, and so the parent rel's physical size and index information isn't
 * important for it.
 */
void
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
				  RelOptInfo *rel)
{
	Index		varno = rel->relid;
	Relation	relation;
	bool		hasindex;
	List	   *indexinfos = NIL;
	bool		needs_longlock;

	/*
	 * We need not lock the relation since it was already locked, either by
	 * the rewriter or when expand_inherited_rtentry() added it to the query's
	 * rangetable.
	 */
	relation = heap_open(relationObjectId, NoLock);
	needs_longlock = rel_needs_long_lock(relationObjectId);

	rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
	rel->max_attr = RelationGetNumberOfAttributes(relation);

	Assert(rel->max_attr >= rel->min_attr);
	rel->attr_needed = (Relids *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
	rel->attr_widths = (int32 *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));

    /*
     * CDB: Get partitioning key info for distributed relation.
     */
    rel->cdbpolicy = RelationGetPartitioningKey(relation);

	/*
	 * Estimate relation size --- unless it's an inheritance parent, in which
	 * case the size will be computed later in set_append_rel_pathlist, and we
	 * must leave it zero for now to avoid bollixing the total_table_pages
	 * calculation.
	 */
	if (!inhparent)
	{
		cdb_estimate_rel_size(
			rel,
			relation,
			relation,
			rel->attr_widths - rel->min_attr,
			&rel->pages,
			&rel->tuples,
			&rel->cdb_default_stats_used
			);
	}

	/*
	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
	 * Don't bother with indexes for an inheritance parent, either.
	 */
	if (inhparent ||
		(IgnoreSystemIndexes && IsSystemClass(relation->rd_rel)))
		hasindex = false;
	else
		hasindex = relation->rd_rel->relhasindex;

	if (hasindex)
	{
		List	   *indexoidlist;
		ListCell   *l;
		LOCKMODE	lmode;

        /* Warn if indexed table needs ANALYZE. */
        if (rel->cdb_default_stats_used)
            cdb_default_stats_warning_for_table(relation->rd_id);

		indexoidlist = RelationGetIndexList(relation);

		/*
		 * For each index, we get the same type of lock that the executor will
		 * need, and do not release it.  This saves a couple of trips to the
		 * shared lock manager while not creating any real loss of
		 * concurrency, because no schema changes could be happening on the
		 * index while we hold lock on the parent rel, and neither lock type
		 * blocks any other kind of index operation.
		 */
		if (rel->relid == root->parse->resultRelation)
			lmode = RowExclusiveLock;
		else
			lmode = AccessShareLock;

		foreach(l, indexoidlist)
		{
			Oid			indexoid = lfirst_oid(l);
			Relation	indexRelation;
			Form_pg_index index;
			IndexOptInfo *info;
			int			ncolumns;
			int			i;

			/*
			 * Extract info from the relation descriptor for the index.
			 */
			indexRelation = index_open(indexoid, lmode);
			index = indexRelation->rd_index;

			/*
			 * Ignore invalid indexes, since they can't safely be used for
			 * queries.  Note that this is OK because the data structure we
			 * are constructing is only used by the planner --- the executor
			 * still needs to insert into "invalid" indexes, if they're marked
			 * IndexIsReady.
			 */
			if (!IndexIsValid(index))
			{
				index_close(indexRelation, NoLock);
				continue;
			}

			/*
			 * If the index is valid, but cannot yet be used, ignore it; but
			 * mark the plan we are generating as transient. See
			 * src/backend/access/heap/README.HOT for discussion.
			 */
			if (index->indcheckxmin &&
				!TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data),
									   TransactionXmin))
			{
				root->glob->transientPlan = true;
				index_close(indexRelation, NoLock);
				continue;
			}

			info = makeNode(IndexOptInfo);

			info->indexoid = index->indexrelid;
			info->rel = rel;
			info->ncolumns = ncolumns = index->indnatts;

			/*
			 * Allocate per-column info arrays.  To save a few palloc cycles
			 * we allocate all the Oid-type arrays in one request.	Note that
			 * the opfamily array needs an extra, terminating zero at the end.
			 * We pre-zero the ordering info in case the index is unordered.
			 */
			info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
			info->opfamily = (Oid *) palloc0(sizeof(Oid) * (4 * ncolumns + 1));
			info->opcintype = info->opfamily + (ncolumns + 1);
			info->fwdsortop = info->opcintype + ncolumns;
			info->revsortop = info->fwdsortop + ncolumns;
			info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);

			for (i = 0; i < ncolumns; i++)
			{
				info->indexkeys[i] = index->indkey.values[i];
				info->opfamily[i] = indexRelation->rd_opfamily[i];
				info->opcintype[i] = indexRelation->rd_opcintype[i];
			}

			info->relam = indexRelation->rd_rel->relam;
			info->amcostestimate = indexRelation->rd_am->amcostestimate;
			info->amoptionalkey = indexRelation->rd_am->amoptionalkey;
			info->amsearchnulls = indexRelation->rd_am->amsearchnulls;

			/*
			 * Fetch the ordering operators associated with the index, if any.
			 * We expect that all ordering-capable indexes use btree's
			 * strategy numbers for the ordering operators.
			 */
			if (indexRelation->rd_am->amcanorder)
			{
				int			nstrat = indexRelation->rd_am->amstrategies;

				for (i = 0; i < ncolumns; i++)
				{
					int16		opt = indexRelation->rd_indoption[i];
					int			fwdstrat;
					int			revstrat;

					if (opt & INDOPTION_DESC)
					{
						fwdstrat = BTGreaterStrategyNumber;
						revstrat = BTLessStrategyNumber;
					}
					else
					{
						fwdstrat = BTLessStrategyNumber;
						revstrat = BTGreaterStrategyNumber;
					}

					/*
					 * Index AM must have a fixed set of strategies for it to
					 * make sense to specify amcanorder, so we need not allow
					 * the case amstrategies == 0.
					 */
					if (fwdstrat > 0)
					{
						Assert(fwdstrat <= nstrat);
						info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1];
					}
					if (revstrat > 0)
					{
						Assert(revstrat <= nstrat);
						info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1];
					}
					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
				}
			}

			/*
			 * Fetch the index expressions and predicate, if any.  We must
			 * modify the copies we obtain from the relcache to have the
			 * correct varno for the parent relation, so that they match up
			 * correctly against qual clauses.
			 */
			info->indexprs = RelationGetIndexExpressions(indexRelation);
			info->indpred = RelationGetIndexPredicate(indexRelation);
			if (info->indexprs && varno != 1)
				ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
			if (info->indpred && varno != 1)
				ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
			info->predOK = false;		/* set later in indxpath.c */
			info->unique = index->indisunique;

			/*
			 * Estimate the index size.  If it's not a partial index, we lock
			 * the number-of-tuples estimate to equal the parent table; if it
			 * is partial then we have to use the same methods as we would for
			 * a table, except we can be sure that the index is not larger
			 * than the table.
			 */
			cdb_estimate_rel_size(rel,
                                  relation,
                                  indexRelation,
                                  NULL,
                                  &info->pages,
                                  &info->tuples,
                                  &info->cdb_default_stats_used);

			if (!info->indpred ||
				info->tuples > rel->tuples)
				info->tuples = rel->tuples;

            if (info->cdb_default_stats_used &&
                !rel->cdb_default_stats_used)
                cdb_default_stats_warning_for_index(relation->rd_id, indexoid);

			index_close(indexRelation, needs_longlock ? NoLock : lmode);

			indexinfos = lcons(info, indexinfos);
		}

		list_free(indexoidlist);
	}
Пример #21
0
Datum
crosstab(PG_FUNCTION_ARGS)
{
	char	   *sql = text_to_cstring(PG_GETARG_TEXT_PP(0));
	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	Tuplestorestate *tupstore;
	TupleDesc	tupdesc;
	int			call_cntr;
	int			max_calls;
	AttInMetadata *attinmeta;
	SPITupleTable *spi_tuptable;
	TupleDesc	spi_tupdesc;
	bool		firstpass;
	char	   *lastrowid;
	int			i;
	int			num_categories;
	MemoryContext per_query_ctx;
	MemoryContext oldcontext;
	int			ret;
	int			proc;

	/* check to see if caller supports us returning a tuplestore */
	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("set-valued function called in context that cannot accept a set")));
	if (!(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("materialize mode required, but it is not " \
						"allowed in this context")));

	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;

	/* Connect to SPI manager */
	if ((ret = SPI_connect()) < 0)
		/* internal error */
		elog(ERROR, "crosstab: SPI_connect returned %d", ret);

	/* Retrieve the desired rows */
	ret = SPI_execute(sql, true, 0);
	proc = SPI_processed;

	/* If no qualifying tuples, fall out early */
	if (ret != SPI_OK_SELECT || proc <= 0)
	{
		SPI_finish();
		rsinfo->isDone = ExprEndResult;
		PG_RETURN_NULL();
	}

	spi_tuptable = SPI_tuptable;
	spi_tupdesc = spi_tuptable->tupdesc;

	/*----------
	 * The provided SQL query must always return three columns.
	 *
	 * 1. rowname
	 *	the label or identifier for each row in the final result
	 * 2. category
	 *	the label or identifier for each column in the final result
	 * 3. values
	 *	the value for each column in the final result
	 *----------
	 */
	if (spi_tupdesc->natts != 3)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid source data SQL statement"),
				 errdetail("The provided SQL must return 3 "
						   "columns: rowid, category, and values.")));

	/* get a tuple descriptor for our result type */
	switch (get_call_result_type(fcinfo, NULL, &tupdesc))
	{
		case TYPEFUNC_COMPOSITE:
			/* success */
			break;
		case TYPEFUNC_RECORD:
			/* failed to determine actual type of RECORD */
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("function returning record called in context "
							"that cannot accept type record")));
			break;
		default:
			/* result type isn't composite */
			elog(ERROR, "return type must be a row type");
			break;
	}

	/*
	 * Check that return tupdesc is compatible with the data we got from SPI,
	 * at least based on number and type of attributes
	 */
	if (!compatCrosstabTupleDescs(tupdesc, spi_tupdesc))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("return and sql tuple descriptions are " \
						"incompatible")));

	/*
	 * switch to long-lived memory context
	 */
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/* make sure we have a persistent copy of the result tupdesc */
	tupdesc = CreateTupleDescCopy(tupdesc);

	/* initialize our tuplestore in long-lived context */
	tupstore =
		tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
							  false, work_mem);

	MemoryContextSwitchTo(oldcontext);

	/*
	 * Generate attribute metadata needed later to produce tuples from raw C
	 * strings
	 */
	attinmeta = TupleDescGetAttInMetadata(tupdesc);

	/* total number of tuples to be examined */
	max_calls = proc;

	/* the return tuple always must have 1 rowid + num_categories columns */
	num_categories = tupdesc->natts - 1;

	firstpass = true;
	lastrowid = NULL;

	for (call_cntr = 0; call_cntr < max_calls; call_cntr++)
	{
		bool		skip_tuple = false;
		char	  **values;

		/* allocate and zero space */
		values = (char **) palloc0((1 + num_categories) * sizeof(char *));

		/*
		 * now loop through the sql results and assign each value in sequence
		 * to the next category
		 */
		for (i = 0; i < num_categories; i++)
		{
			HeapTuple	spi_tuple;
			char	   *rowid;

			/* see if we've gone too far already */
			if (call_cntr >= max_calls)
				break;

			/* get the next sql result tuple */
			spi_tuple = spi_tuptable->vals[call_cntr];

			/* get the rowid from the current sql result tuple */
			rowid = SPI_getvalue(spi_tuple, spi_tupdesc, 1);

			/*
			 * If this is the first pass through the values for this rowid,
			 * set the first column to rowid
			 */
			if (i == 0)
			{
				xpstrdup(values[0], rowid);

				/*
				 * Check to see if the rowid is the same as that of the last
				 * tuple sent -- if so, skip this tuple entirely
				 */
				if (!firstpass && xstreq(lastrowid, rowid))
				{
					xpfree(rowid);
					skip_tuple = true;
					break;
				}
			}

			/*
			 * If rowid hasn't changed on us, continue building the output
			 * tuple.
			 */
			if (xstreq(rowid, values[0]))
			{
				/*
				 * Get the next category item value, which is always attribute
				 * number three.
				 *
				 * Be careful to assign the value to the array index based on
				 * which category we are presently processing.
				 */
				values[1 + i] = SPI_getvalue(spi_tuple, spi_tupdesc, 3);

				/*
				 * increment the counter since we consume a row for each
				 * category, but not for last pass because the outer loop will
				 * do that for us
				 */
				if (i < (num_categories - 1))
					call_cntr++;
				xpfree(rowid);
			}
			else
			{
				/*
				 * We'll fill in NULLs for the missing values, but we need to
				 * decrement the counter since this sql result row doesn't
				 * belong to the current output tuple.
				 */
				call_cntr--;
				xpfree(rowid);
				break;
			}
		}

		if (!skip_tuple)
		{
			HeapTuple	tuple;

			/* build the tuple and store it */
			tuple = BuildTupleFromCStrings(attinmeta, values);
			tuplestore_puttuple(tupstore, tuple);
			heap_freetuple(tuple);
		}

		/* Remember current rowid */
		xpfree(lastrowid);
		xpstrdup(lastrowid, values[0]);
		firstpass = false;

		/* Clean up */
		for (i = 0; i < num_categories + 1; i++)
			if (values[i] != NULL)
				pfree(values[i]);
		pfree(values);
	}

	/* let the caller know we're sending back a tuplestore */
	rsinfo->returnMode = SFRM_Materialize;
	rsinfo->setResult = tupstore;
	rsinfo->setDesc = tupdesc;

	/* release SPI related resources (and return to caller's context) */
	SPI_finish();

	return (Datum) 0;
}
Пример #22
0
/*
 *		ConstructTupleDescriptor
 *
 * Build an index tuple descriptor for a new index
 */
static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,
						 IndexInfo *indexInfo,
						 Oid *classObjectId)
{
	int			numatts = indexInfo->ii_NumIndexAttrs;
	List	   *indexprs = indexInfo->ii_Expressions;
	TupleDesc	heapTupDesc;
	TupleDesc	indexTupDesc;
	int			natts;			/* #atts in heap rel --- for error checks */
	int			i;

	heapTupDesc = RelationGetDescr(heapRelation);
	natts = RelationGetForm(heapRelation)->relnatts;

	/*
	 * allocate the new tuple descriptor
	 */
	indexTupDesc = CreateTemplateTupleDesc(numatts, false);

	/*
	 * For simple index columns, we copy the pg_attribute row from the
	 * parent relation and modify it as necessary.	For expressions we
	 * have to cons up a pg_attribute row the hard way.
	 */
	for (i = 0; i < numatts; i++)
	{
		AttrNumber	atnum = indexInfo->ii_KeyAttrNumbers[i];
		Form_pg_attribute to;
		HeapTuple	tuple;
		Form_pg_type typeTup;
		Oid			keyType;

		indexTupDesc->attrs[i] = to =
			(Form_pg_attribute) palloc0(ATTRIBUTE_TUPLE_SIZE);

		if (atnum != 0)
		{
			/* Simple index column */
			Form_pg_attribute from;

			if (atnum < 0)
			{
				/*
				 * here we are indexing on a system attribute (-1...-n)
				 */
				from = SystemAttributeDefinition(atnum,
									   heapRelation->rd_rel->relhasoids);
			}
			else
			{
				/*
				 * here we are indexing on a normal attribute (1...n)
				 */
				if (atnum > natts)		/* safety check */
					elog(ERROR, "invalid column number %d", atnum);
				from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
			}

			/*
			 * now that we've determined the "from", let's copy the tuple
			 * desc data...
			 */
			memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);

			/*
			 * Fix the stuff that should not be the same as the underlying
			 * attr
			 */
			to->attnum = i + 1;

			to->attstattarget = 0;
			to->attcacheoff = -1;
			to->attnotnull = false;
			to->atthasdef = false;
			to->attislocal = true;
			to->attinhcount = 0;
		}
		else
		{
			/* Expressional index */
			Node	   *indexkey;

			if (indexprs == NIL)	/* shouldn't happen */
				elog(ERROR, "too few entries in indexprs list");
			indexkey = (Node *) lfirst(indexprs);
			indexprs = lnext(indexprs);

			/*
			 * Make the attribute's name "pg_expresssion_nnn" (maybe think
			 * of something better later)
			 */
			sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);

			/*
			 * Lookup the expression type in pg_type for the type length
			 * etc.
			 */
			keyType = exprType(indexkey);
			tuple = SearchSysCache(TYPEOID,
								   ObjectIdGetDatum(keyType),
								   0, 0, 0);
			if (!HeapTupleIsValid(tuple))
				elog(ERROR, "cache lookup failed for type %u", keyType);
			typeTup = (Form_pg_type) GETSTRUCT(tuple);

			/*
			 * Assign some of the attributes values. Leave the rest as 0.
			 */
			to->attnum = i + 1;
			to->atttypid = keyType;
			to->attlen = typeTup->typlen;
			to->attbyval = typeTup->typbyval;
			to->attstorage = typeTup->typstorage;
			to->attalign = typeTup->typalign;
			to->attcacheoff = -1;
			to->atttypmod = -1;
			to->attislocal = true;

			ReleaseSysCache(tuple);
		}

		/*
		 * We do not yet have the correct relation OID for the index, so
		 * just set it invalid for now.  InitializeAttributeOids() will
		 * fix it later.
		 */
		to->attrelid = InvalidOid;

		/*
		 * Check the opclass to see if it provides a keytype (overriding
		 * the attribute type).
		 */
		tuple = SearchSysCache(CLAOID,
							   ObjectIdGetDatum(classObjectId[i]),
							   0, 0, 0);
		if (!HeapTupleIsValid(tuple))
			elog(ERROR, "cache lookup failed for opclass %u",
				 classObjectId[i]);
		keyType = ((Form_pg_opclass) GETSTRUCT(tuple))->opckeytype;
		ReleaseSysCache(tuple);

		if (OidIsValid(keyType) && keyType != to->atttypid)
		{
			/* index value and heap value have different types */
			tuple = SearchSysCache(TYPEOID,
								   ObjectIdGetDatum(keyType),
								   0, 0, 0);
			if (!HeapTupleIsValid(tuple))
				elog(ERROR, "cache lookup failed for type %u", keyType);
			typeTup = (Form_pg_type) GETSTRUCT(tuple);

			to->atttypid = keyType;
			to->atttypmod = -1;
			to->attlen = typeTup->typlen;
			to->attbyval = typeTup->typbyval;
			to->attalign = typeTup->typalign;
			to->attstorage = typeTup->typstorage;

			ReleaseSysCache(tuple);
		}
	}

	return indexTupDesc;
}
Пример #23
0
/*
 * Look to see if we have template information for the given language name.
 */
static PLTemplate *
find_language_template(const char *languageName)
{
	PLTemplate *result;
	Relation	rel;
	SysScanDesc scan;
	ScanKeyData key;
	HeapTuple	tup;

	rel = heap_open(PLTemplateRelationId, AccessShareLock);

	ScanKeyInit(&key,
				Anum_pg_pltemplate_tmplname,
				BTEqualStrategyNumber, F_NAMEEQ,
				CStringGetDatum(languageName));
	scan = systable_beginscan(rel, PLTemplateNameIndexId, true,
							  SnapshotNow, 1, &key);

	tup = systable_getnext(scan);
	if (HeapTupleIsValid(tup))
	{
		Form_pg_pltemplate tmpl = (Form_pg_pltemplate) GETSTRUCT(tup);
		Datum		datum;
		bool		isnull;

		result = (PLTemplate *) palloc0(sizeof(PLTemplate));
		result->tmpltrusted = tmpl->tmpltrusted;
		result->tmpldbacreate = tmpl->tmpldbacreate;

		/* Remaining fields are variable-width so we need heap_getattr */
		datum = heap_getattr(tup, Anum_pg_pltemplate_tmplhandler,
							 RelationGetDescr(rel), &isnull);
		if (!isnull)
			result->tmplhandler = TextDatumGetCString(datum);

		datum = heap_getattr(tup, Anum_pg_pltemplate_tmplinline,
							 RelationGetDescr(rel), &isnull);
		if (!isnull)
			result->tmplinline = TextDatumGetCString(datum);

		datum = heap_getattr(tup, Anum_pg_pltemplate_tmplvalidator,
							 RelationGetDescr(rel), &isnull);
		if (!isnull)
			result->tmplvalidator = TextDatumGetCString(datum);

		datum = heap_getattr(tup, Anum_pg_pltemplate_tmpllibrary,
							 RelationGetDescr(rel), &isnull);
		if (!isnull)
			result->tmpllibrary = TextDatumGetCString(datum);

		/* Ignore template if handler or library info is missing */
		if (!result->tmplhandler || !result->tmpllibrary)
			result = NULL;
	}
	else
		result = NULL;

	systable_endscan(scan);

	heap_close(rel, AccessShareLock);

	return result;
}
Пример #24
0
/*
 * Bulk deletion of all index entries pointing to a set of heap tuples.
 * The set of target tuples is specified via a callback routine that tells
 * whether any given heap tuple (identified by ItemPointer) is being deleted.
 *
 * Result: a palloc'd struct containing statistical info for VACUUM displays.
 */
Datum
hashbulkdelete(PG_FUNCTION_ARGS)
{
	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
	Relation	rel = info->index;
	double		tuples_removed;
	double		num_index_tuples;
	double		orig_ntuples;
	Bucket		orig_maxbucket;
	Bucket		cur_maxbucket;
	Bucket		cur_bucket;
	Buffer		metabuf;
	HashMetaPage metap;
	HashMetaPageData local_metapage;

	tuples_removed = 0;
	num_index_tuples = 0;

	/*
	 * Read the metapage to fetch original bucket and tuple counts.  Also, we
	 * keep a copy of the last-seen metapage so that we can use its
	 * hashm_spares[] values to compute bucket page addresses.	This is a bit
	 * hokey but perfectly safe, since the interesting entries in the spares
	 * array cannot change under us; and it beats rereading the metapage for
	 * each bucket.
	 */
	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
	metap = HashPageGetMeta(BufferGetPage(metabuf));
	orig_maxbucket = metap->hashm_maxbucket;
	orig_ntuples = metap->hashm_ntuples;
	memcpy(&local_metapage, metap, sizeof(local_metapage));
	_hash_relbuf(rel, metabuf);

	/* Scan the buckets that we know exist */
	cur_bucket = 0;
	cur_maxbucket = orig_maxbucket;

loop_top:
	while (cur_bucket <= cur_maxbucket)
	{
		BlockNumber bucket_blkno;
		BlockNumber blkno;
		bool		bucket_dirty = false;

		/* Get address of bucket's start page */
		bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket);

		/* Exclusive-lock the bucket so we can shrink it */
		_hash_getlock(rel, bucket_blkno, HASH_EXCLUSIVE);

		/* Shouldn't have any active scans locally, either */
		if (_hash_has_active_scan(rel, cur_bucket))
			elog(ERROR, "hash index has active scan during VACUUM");

		/* Scan each page in bucket */
		blkno = bucket_blkno;
		while (BlockNumberIsValid(blkno))
		{
			Buffer		buf;
			Page		page;
			HashPageOpaque opaque;
			OffsetNumber offno;
			OffsetNumber maxoffno;
			OffsetNumber deletable[MaxOffsetNumber];
			int			ndeletable = 0;

			vacuum_delay_point();

			buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
										   LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
											 info->strategy);
			page = BufferGetPage(buf);
			opaque = (HashPageOpaque) PageGetSpecialPointer(page);
			Assert(opaque->hasho_bucket == cur_bucket);

			/* Scan each tuple in page */
			maxoffno = PageGetMaxOffsetNumber(page);
			for (offno = FirstOffsetNumber;
				 offno <= maxoffno;
				 offno = OffsetNumberNext(offno))
			{
				IndexTuple	itup;
				ItemPointer htup;

				itup = (IndexTuple) PageGetItem(page,
												PageGetItemId(page, offno));
				htup = &(itup->t_tid);
				if (callback(htup, callback_state))
				{
					/* mark the item for deletion */
					deletable[ndeletable++] = offno;
					tuples_removed += 1;
				}
				else
					num_index_tuples += 1;
			}

			/*
			 * Apply deletions and write page if needed, advance to next page.
			 */
			blkno = opaque->hasho_nextblkno;

			if (ndeletable > 0)
			{
				PageIndexMultiDelete(page, deletable, ndeletable);
				_hash_wrtbuf(rel, buf);
				bucket_dirty = true;
			}
			else
				_hash_relbuf(rel, buf);
		}

		/* If we deleted anything, try to compact free space */
		if (bucket_dirty)
			_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
								info->strategy);

		/* Release bucket lock */
		_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);

		/* Advance to next bucket */
		cur_bucket++;
	}

	/* Write-lock metapage and check for split since we started */
	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE, LH_META_PAGE);
	metap = HashPageGetMeta(BufferGetPage(metabuf));

	if (cur_maxbucket != metap->hashm_maxbucket)
	{
		/* There's been a split, so process the additional bucket(s) */
		cur_maxbucket = metap->hashm_maxbucket;
		memcpy(&local_metapage, metap, sizeof(local_metapage));
		_hash_relbuf(rel, metabuf);
		goto loop_top;
	}

	/* Okay, we're really done.  Update tuple count in metapage. */

	if (orig_maxbucket == metap->hashm_maxbucket &&
		orig_ntuples == metap->hashm_ntuples)
	{
		/*
		 * No one has split or inserted anything since start of scan, so
		 * believe our count as gospel.
		 */
		metap->hashm_ntuples = num_index_tuples;
	}
	else
	{
		/*
		 * Otherwise, our count is untrustworthy since we may have
		 * double-scanned tuples in split buckets.	Proceed by dead-reckoning.
		 * (Note: we still return estimated_count = false, because using this
		 * count is better than not updating reltuples at all.)
		 */
		if (metap->hashm_ntuples > tuples_removed)
			metap->hashm_ntuples -= tuples_removed;
		else
			metap->hashm_ntuples = 0;
		num_index_tuples = metap->hashm_ntuples;
	}

	_hash_wrtbuf(rel, metabuf);

	/* return statistics */
	if (stats == NULL)
		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
	stats->estimated_count = false;
	stats->num_index_tuples = num_index_tuples;
	stats->tuples_removed += tuples_removed;
	/* hashvacuumcleanup will fill in num_pages */

	PG_RETURN_POINTER(stats);
}
Пример #25
0
/*
 * CompactCheckpointerRequestQueue
 *		Remove duplicates from the request queue to avoid backend fsyncs.
 *		Returns "true" if any entries were removed.
 *
 * Although a full fsync request queue is not common, it can lead to severe
 * performance problems when it does happen.  So far, this situation has
 * only been observed to occur when the system is under heavy write load,
 * and especially during the "sync" phase of a checkpoint.  Without this
 * logic, each backend begins doing an fsync for every block written, which
 * gets very expensive and can slow down the whole system.
 *
 * Trying to do this every time the queue is full could lose if there
 * aren't any removable entries.  But that should be vanishingly rare in
 * practice: there's one queue entry per shared buffer.
 */
static bool
CompactCheckpointerRequestQueue(void)
{
	struct CheckpointerSlotMapping
	{
		CheckpointerRequest request;
		int			slot;
	};

	int			n,
				preserve_count;
	int			num_skipped = 0;
	HASHCTL		ctl;
	HTAB	   *htab;
	bool	   *skip_slot;

	/* must hold CheckpointerCommLock in exclusive mode */
	Assert(LWLockHeldByMe(CheckpointerCommLock));

	/* Initialize skip_slot array */
	skip_slot = palloc0(sizeof(bool) * CheckpointerShmem->num_requests);

	/* Initialize temporary hash table */
	MemSet(&ctl, 0, sizeof(ctl));
	ctl.keysize = sizeof(CheckpointerRequest);
	ctl.entrysize = sizeof(struct CheckpointerSlotMapping);
	ctl.hcxt = CurrentMemoryContext;

	htab = hash_create("CompactCheckpointerRequestQueue",
					   CheckpointerShmem->num_requests,
					   &ctl,
					   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);

	/*
	 * The basic idea here is that a request can be skipped if it's followed
	 * by a later, identical request.  It might seem more sensible to work
	 * backwards from the end of the queue and check whether a request is
	 * *preceded* by an earlier, identical request, in the hopes of doing less
	 * copying.  But that might change the semantics, if there's an
	 * intervening FORGET_RELATION_FSYNC or FORGET_DATABASE_FSYNC request, so
	 * we do it this way.  It would be possible to be even smarter if we made
	 * the code below understand the specific semantics of such requests (it
	 * could blow away preceding entries that would end up being canceled
	 * anyhow), but it's not clear that the extra complexity would buy us
	 * anything.
	 */
	for (n = 0; n < CheckpointerShmem->num_requests; n++)
	{
		CheckpointerRequest *request;
		struct CheckpointerSlotMapping *slotmap;
		bool		found;

		/*
		 * We use the request struct directly as a hashtable key.  This
		 * assumes that any padding bytes in the structs are consistently the
		 * same, which should be okay because we zeroed them in
		 * CheckpointerShmemInit.  Note also that RelFileNode had better
		 * contain no pad bytes.
		 */
		request = &CheckpointerShmem->requests[n];
		slotmap = hash_search(htab, request, HASH_ENTER, &found);
		if (found)
		{
			/* Duplicate, so mark the previous occurrence as skippable */
			skip_slot[slotmap->slot] = true;
			num_skipped++;
		}
		/* Remember slot containing latest occurrence of this request value */
		slotmap->slot = n;
	}

	/* Done with the hash table. */
	hash_destroy(htab);

	/* If no duplicates, we're out of luck. */
	if (!num_skipped)
	{
		pfree(skip_slot);
		return false;
	}

	/* We found some duplicates; remove them. */
	preserve_count = 0;
	for (n = 0; n < CheckpointerShmem->num_requests; n++)
	{
		if (skip_slot[n])
			continue;
		CheckpointerShmem->requests[preserve_count++] = CheckpointerShmem->requests[n];
	}
	ereport(DEBUG1,
			(errmsg("compacted fsync request queue from %d entries to %d entries",
					CheckpointerShmem->num_requests, preserve_count)));
	CheckpointerShmem->num_requests = preserve_count;

	/* Cleanup. */
	pfree(skip_slot);
	return true;
}
Пример #26
0
/*
 * gp_read_error_log
 *
 * Returns set of error log tuples.
 */
Datum
gp_read_error_log(PG_FUNCTION_ARGS)
{
	FuncCallContext	   *funcctx;
	ReadErrorLogContext *context;
	HeapTuple			tuple;
	Datum				result;

	/*
	 * First call setup
	 */
	if (SRF_IS_FIRSTCALL())
	{
		MemoryContext	oldcontext;
		FILE	   *fp;
		text	   *relname;

		funcctx = SRF_FIRSTCALL_INIT();

		relname = PG_GETARG_TEXT_P(0);
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		context = palloc0(sizeof(ReadErrorLogContext));
		funcctx->user_fctx = (void *) context;

		funcctx->tuple_desc = BlessTupleDesc(GetErrorTupleDesc());

		/*
		 * Though this function is usually executed on segment, we dispatch
		 * the execution if it happens to be on QD, and combine the results
		 * into one set.
		 */
		if (Gp_role == GP_ROLE_DISPATCH)
		{
			int		resultCount = 0;
			PGresult **results = NULL;
			StringInfoData sql;
			StringInfoData errbuf;
			int		i;

			initStringInfo(&sql);
			initStringInfo(&errbuf);

			/*
			 * construct SQL
			 */
			appendStringInfo(&sql,
					"SELECT * FROM pg_catalog.gp_read_error_log(%s) ",
							 quote_literal_internal(text_to_cstring(relname)));

			results = cdbdisp_dispatchRMCommand(sql.data, true, &errbuf,
												&resultCount);

			if (errbuf.len > 0)
				elog(ERROR, "%s", errbuf.data);
			Assert(resultCount > 0);

			for (i = 0; i < resultCount; i++)
			{
				if (PQresultStatus(results[i]) != PGRES_TUPLES_OK)
					elog(ERROR, "unexpected result from segment: %d",
								PQresultStatus(results[i]));
				context->numTuples += PQntuples(results[i]);
			}

			pfree(errbuf.data);
			pfree(sql.data);

			context->segResults = results;
			context->numSegResults = resultCount;
		}
		else
		{
			/*
			 * In QE, read the error log.
			 */
			RangeVar	   *relrv;
			Oid				relid;

			relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
			relid = RangeVarGetRelid(relrv, true);

			/*
			 * If the relation has gone, silently return no tuples.
			 */
			if (OidIsValid(relid))
			{
				AclResult aclresult;

				/*
				 * Requires SELECT priv to read error log.
				 */
				aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
				if (aclresult != ACLCHECK_OK)
					aclcheck_error(aclresult, ACL_KIND_CLASS, relrv->relname);

				ErrorLogFileName(context->filename, MyDatabaseId, relid);
				fp = AllocateFile(context->filename, "r");
				context->fp = fp;
			}
		}

		MemoryContextSwitchTo(oldcontext);

		if (Gp_role != GP_ROLE_DISPATCH && !context->fp)
		{
			pfree(context);
			SRF_RETURN_DONE(funcctx);
		}
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (ReadErrorLogContext *) funcctx->user_fctx;

	/*
	 * Read error log, probably on segments.  We don't check Gp_role, however,
	 * in case master also wants to read the file.
	 */
	if (context->fp)
	{
		pg_crc32	crc, written_crc;
		tuple = ErrorLogRead(context->fp, &written_crc);

		/*
		 * CRC check.
		 */
		if (HeapTupleIsValid(tuple))
		{
			INIT_CRC32C(crc);
			COMP_CRC32C(crc, tuple->t_data, tuple->t_len);
			FIN_CRC32C(crc);

			if (!EQ_CRC32C(crc, written_crc))
			{
				elog(LOG, "incorrect checksum in error log %s",
						  context->filename);
				tuple = NULL;
			}
		}

		/*
		 * If we found a valid tuple, return it.  Otherwise, fall through
		 * in the DONE routine.
		 */
		if (HeapTupleIsValid(tuple))
		{
			/*
			 * We need to set typmod for the executor to understand
			 * its type we just blessed.
			 */
			HeapTupleHeaderSetTypMod(tuple->t_data,
									 funcctx->tuple_desc->tdtypmod);

			result = HeapTupleGetDatum(tuple);
			SRF_RETURN_NEXT(funcctx, result);
		}
	}

	/*
	 * If we got results from dispatch, return all the tuples.
	 */
	while (context->currentResult < context->numSegResults)
	{
		Datum		values[NUM_ERRORTABLE_ATTR];
		bool		isnull[NUM_ERRORTABLE_ATTR];
		PGresult   *segres = context->segResults[context->currentResult];
		int			row = context->currentRow;

		if (row >= PQntuples(segres))
		{
			context->currentRow = 0;
			context->currentResult++;
			continue;
		}
		context->currentRow++;

		MemSet(isnull, false, sizeof(isnull));

		values[0] = ResultToDatum(segres, row, 0, timestamptz_in, &isnull[0]);
		values[1] = ResultToDatum(segres, row, 1, textin, &isnull[1]);
		values[2] = ResultToDatum(segres, row, 2, textin, &isnull[2]);
		values[3] = ResultToDatum(segres, row, 3, int4in, &isnull[3]);
		values[4] = ResultToDatum(segres, row, 4, int4in, &isnull[4]);
		values[5] = ResultToDatum(segres, row, 5, textin, &isnull[5]);
		values[6] = ResultToDatum(segres, row, 6, textin, &isnull[6]);
		values[7] = ResultToDatum(segres, row, 7, byteain, &isnull[7]);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, isnull);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}

	if (context->segResults != NULL)
	{
		int		i;

		for (i = 0; i < context->numSegResults; i++)
			PQclear(context->segResults[i]);

		/* XXX: better to copy to palloc'ed area */
		free(context->segResults);
	}

	/*
	 * Close the file, if we have opened it.
	 */
	if (context->fp != NULL)
	{
		FreeFile(context->fp);
		context->fp = NULL;
	}

	SRF_RETURN_DONE(funcctx);
}
Пример #27
0
Datum
ltree_in(PG_FUNCTION_ARGS)
{
	char	   *buf = (char *) PG_GETARG_POINTER(0);
	char	   *ptr;
	nodeitem   *list,
			   *lptr;
	int			num = 0,
				totallen = 0;
	int			state = LTPRS_WAITNAME;
	ltree	   *result;
	ltree_level *curlevel;
	int			charlen;
	int			pos = 0;

	ptr = buf;
	while (*ptr)
	{
		charlen = pg_mblen(ptr);
		if (charlen == 1 && t_iseq(ptr, '.'))
			num++;
		ptr += charlen;
	}

	if (num + 1 > MaxAllocSize / sizeof(nodeitem))
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
						num + 1, (int) (MaxAllocSize / sizeof(nodeitem)))));
	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
	ptr = buf;
	while (*ptr)
	{
		charlen = pg_mblen(ptr);

		if (state == LTPRS_WAITNAME)
		{
			if (ISALNUM(ptr))
			{
				lptr->start = ptr;
				lptr->wlen = 0;
				state = LTPRS_WAITDELIM;
			}
			else
				UNCHAR;
		}
		else if (state == LTPRS_WAITDELIM)
		{
			if (charlen == 1 && t_iseq(ptr, '.'))
			{
				lptr->len = ptr - lptr->start;
				if (lptr->wlen > 255)
					ereport(ERROR,
							(errcode(ERRCODE_NAME_TOO_LONG),
							 errmsg("name of level is too long"),
							 errdetail("Name length is %d, must "
									   "be < 256, in position %d.",
									   lptr->wlen, pos)));

				totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
				lptr++;
				state = LTPRS_WAITNAME;
			}
			else if (!ISALNUM(ptr))
				UNCHAR;
		}
		else
			/* internal error */
			elog(ERROR, "internal error in parser");

		ptr += charlen;
		lptr->wlen++;
		pos++;
	}

	if (state == LTPRS_WAITDELIM)
	{
		lptr->len = ptr - lptr->start;
		if (lptr->wlen > 255)
			ereport(ERROR,
					(errcode(ERRCODE_NAME_TOO_LONG),
					 errmsg("name of level is too long"),
					 errdetail("Name length is %d, must "
							   "be < 256, in position %d.",
							   lptr->wlen, pos)));

		totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
		lptr++;
	}
	else if (!(state == LTPRS_WAITNAME && lptr == list))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("syntax error"),
				 errdetail("Unexpected end of line.")));

	result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
	SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
	result->numlevel = lptr - list;
	curlevel = LTREE_FIRST(result);
	lptr = list;
	while (lptr - list < result->numlevel)
	{
		curlevel->len = (uint16) lptr->len;
		memcpy(curlevel->name, lptr->start, lptr->len);
		curlevel = LEVEL_NEXT(curlevel);
		lptr++;
	}

	pfree(list);
	PG_RETURN_POINTER(result);
}
Пример #28
0
/*
 * Collects info about fields of a composite type.
 *
 * Based on TupleDescGetAttInMetadata.
 */
ProxyComposite *
plproxy_composite_info(ProxyFunction *func, TupleDesc tupdesc)
{
	int			i,
				natts = tupdesc->natts;
	ProxyComposite *ret;
	MemoryContext old_ctx;
	Form_pg_attribute a;
	ProxyType  *type;
	const char *name;
	Oid oid = tupdesc->tdtypeid;

	old_ctx = MemoryContextSwitchTo(func->ctx);

	ret = palloc(sizeof(*ret));
	ret->type_list = palloc(sizeof(ProxyType *) * natts);
	ret->name_list = palloc0(sizeof(char *) * natts);
	ret->tupdesc = BlessTupleDesc(tupdesc);
	ret->use_binary = 1;

	ret->alterable = 0;
	if (oid != RECORDOID)
	{
		HeapTuple type_tuple;
		HeapTuple rel_tuple;
		Form_pg_type pg_type;

		type_tuple = SearchSysCache(TYPEOID, ObjectIdGetDatum(oid), 0, 0, 0);
		if (!HeapTupleIsValid(type_tuple))
			elog(ERROR, "cache lookup failed for type %u", oid);
		pg_type = (Form_pg_type) GETSTRUCT(type_tuple);
		rel_tuple = SearchSysCache(RELOID, ObjectIdGetDatum(pg_type->typrelid), 0, 0, 0);
		if (!HeapTupleIsValid(rel_tuple))
			elog(ERROR, "cache lookup failed for type relation %u", pg_type->typrelid);
		plproxy_set_stamp(&ret->stamp, rel_tuple);
		ReleaseSysCache(rel_tuple);
		ReleaseSysCache(type_tuple);
		ret->alterable = 1;

		if (ret->tupdesc->tdtypeid != oid)
			elog(ERROR, "lost oid");
	}

	MemoryContextSwitchTo(old_ctx);

	ret->nfields = 0;
	for (i = 0; i < natts; i++)
	{
		a = TupleDescAttr(tupdesc, i);
		if (a->attisdropped)
		{
			ret->name_list[i] = NULL;
			ret->type_list[i] = NULL;
			continue;
		}
		ret->nfields++;

		name = quote_identifier(NameStr(a->attname));
		ret->name_list[i] = plproxy_func_strdup(func, name);

		type = plproxy_find_type_info(func, a->atttypid, 0);
		ret->type_list[i] = type;

		if (!type->has_recv)
			ret->use_binary = 0;
	}

	return ret;
}
Пример #29
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 */
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
		lazy_truncate_heap(onerel, vacrelstats);

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	/* Update statistics in pg_class */
	vac_update_relstats(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);
}
Пример #30
0
/* ----------------------------------------------------------------
 *		ExecInitGather
 * ----------------------------------------------------------------
 */
GatherMergeState *
ExecInitGatherMerge(GatherMerge *node, EState *estate, int eflags)
{
	GatherMergeState *gm_state;
	Plan	   *outerNode;
	bool		hasoid;
	TupleDesc	tupDesc;

	/* Gather merge node doesn't have innerPlan node. */
	Assert(innerPlan(node) == NULL);

	/*
	 * create state structure
	 */
	gm_state = makeNode(GatherMergeState);
	gm_state->ps.plan = (Plan *) node;
	gm_state->ps.state = estate;
	gm_state->ps.ExecProcNode = ExecGatherMerge;

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &gm_state->ps);

	/*
	 * initialize child expressions
	 */
	gm_state->ps.qual =
		ExecInitQual(node->plan.qual, &gm_state->ps);

	/*
	 * tuple table initialization
	 */
	ExecInitResultTupleSlot(estate, &gm_state->ps);

	/*
	 * now initialize outer plan
	 */
	outerNode = outerPlan(node);
	outerPlanState(gm_state) = ExecInitNode(outerNode, estate, eflags);

	/*
	 * Initialize result tuple type and projection info.
	 */
	ExecAssignResultTypeFromTL(&gm_state->ps);
	ExecAssignProjectionInfo(&gm_state->ps, NULL);

	gm_state->gm_initialized = false;

	/*
	 * initialize sort-key information
	 */
	if (node->numCols)
	{
		int			i;

		gm_state->gm_nkeys = node->numCols;
		gm_state->gm_sortkeys =
			palloc0(sizeof(SortSupportData) * node->numCols);

		for (i = 0; i < node->numCols; i++)
		{
			SortSupport sortKey = gm_state->gm_sortkeys + i;

			sortKey->ssup_cxt = CurrentMemoryContext;
			sortKey->ssup_collation = node->collations[i];
			sortKey->ssup_nulls_first = node->nullsFirst[i];
			sortKey->ssup_attno = node->sortColIdx[i];

			/*
			 * We don't perform abbreviated key conversion here, for the same
			 * reasons that it isn't used in MergeAppend
			 */
			sortKey->abbreviate = false;

			PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
		}
	}

	/*
	 * store the tuple descriptor into gather merge state, so we can use it
	 * later while initializing the gather merge slots.
	 */
	if (!ExecContextForcesOids(&gm_state->ps, &hasoid))
		hasoid = false;
	tupDesc = ExecTypeFromTL(outerNode->targetlist, hasoid);
	gm_state->tupDesc = tupDesc;

	return gm_state;
}