Exemplo n.º 1
0
/*
 * Redo recompression of posting list.  Doing all the changes in-place is not
 * always possible, because it might require more space than we've on the page.
 * Instead, once modification is required we copy unprocessed tail of the page
 * into separately allocated chunk of memory for further reading original
 * versions of segments.  Thanks to that we don't bother about moving page data
 * in-place.
 */
static void
ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
{
	int			actionno;
	int			segno;
	GinPostingList *oldseg;
	Pointer		segmentend;
	char	   *walbuf;
	int			totalsize;
	Pointer		tailCopy = NULL;
	Pointer		writePtr;
	Pointer		segptr;

	/*
	 * If the page is in pre-9.4 format, convert to new format first.
	 */
	if (!GinPageIsCompressed(page))
	{
		ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page);
		int			nuncompressed = GinPageGetOpaque(page)->maxoff;
		int			npacked;

		/*
		 * Empty leaf pages are deleted as part of vacuum, but leftmost and
		 * rightmost pages are never deleted.  So, pg_upgrade'd from pre-9.4
		 * instances might contain empty leaf pages, and we need to handle
		 * them correctly.
		 */
		if (nuncompressed > 0)
		{
			GinPostingList *plist;

			plist = ginCompressPostingList(uncompressed, nuncompressed,
										   BLCKSZ, &npacked);
			totalsize = SizeOfGinPostingList(plist);

			Assert(npacked == nuncompressed);

			memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
		}
		else
		{
			totalsize = 0;
		}

		GinDataPageSetDataSize(page, totalsize);
		GinPageSetCompressed(page);
		GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
	}

	oldseg = GinDataLeafPageGetPostingList(page);
	writePtr = (Pointer) oldseg;
	segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
	segno = 0;

	walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf);
	for (actionno = 0; actionno < data->nactions; actionno++)
	{
		uint8		a_segno = *((uint8 *) (walbuf++));
		uint8		a_action = *((uint8 *) (walbuf++));
		GinPostingList *newseg = NULL;
		int			newsegsize = 0;
		ItemPointerData *items = NULL;
		uint16		nitems = 0;
		ItemPointerData *olditems;
		int			nolditems;
		ItemPointerData *newitems;
		int			nnewitems;
		int			segsize;

		/* Extract all the information we need from the WAL record */
		if (a_action == GIN_SEGMENT_INSERT ||
			a_action == GIN_SEGMENT_REPLACE)
		{
			newseg = (GinPostingList *) walbuf;
			newsegsize = SizeOfGinPostingList(newseg);
			walbuf += SHORTALIGN(newsegsize);
		}

		if (a_action == GIN_SEGMENT_ADDITEMS)
		{
			memcpy(&nitems, walbuf, sizeof(uint16));
			walbuf += sizeof(uint16);
			items = (ItemPointerData *) walbuf;
			walbuf += nitems * sizeof(ItemPointerData);
		}

		/* Skip to the segment that this action concerns */
		Assert(segno <= a_segno);
		while (segno < a_segno)
		{
			/*
			 * Once modification is started and page tail is copied, we've
			 * to copy unmodified segments.
			 */
			segsize = SizeOfGinPostingList(oldseg);
			if (tailCopy)
			{
				Assert(writePtr + segsize < PageGetSpecialPointer(page));
				memcpy(writePtr, (Pointer) oldseg, segsize);
			}
			writePtr += segsize;
			oldseg = GinNextPostingListSegment(oldseg);
			segno++;
		}

		/*
		 * ADDITEMS action is handled like REPLACE, but the new segment to
		 * replace the old one is reconstructed using the old segment from
		 * disk and the new items from the WAL record.
		 */
		if (a_action == GIN_SEGMENT_ADDITEMS)
		{
			int			npacked;

			olditems = ginPostingListDecode(oldseg, &nolditems);

			newitems = ginMergeItemPointers(items, nitems,
											olditems, nolditems,
											&nnewitems);
			Assert(nnewitems == nolditems + nitems);

			newseg = ginCompressPostingList(newitems, nnewitems,
											BLCKSZ, &npacked);
			Assert(npacked == nnewitems);

			newsegsize = SizeOfGinPostingList(newseg);
			a_action = GIN_SEGMENT_REPLACE;
		}

		segptr = (Pointer) oldseg;
		if (segptr != segmentend)
			segsize = SizeOfGinPostingList(oldseg);
		else
		{
			/*
			 * Positioned after the last existing segment. Only INSERTs
			 * expected here.
			 */
			Assert(a_action == GIN_SEGMENT_INSERT);
			segsize = 0;
		}

		/*
		 * We're about to start modification of the page.  So, copy tail of the
		 * page if it's not done already.
		 */
		if (!tailCopy && segptr != segmentend)
		{
			int tailSize = segmentend - segptr;

			tailCopy = (Pointer) palloc(tailSize);
			memcpy(tailCopy, segptr, tailSize);
			segptr = tailCopy;
			oldseg = (GinPostingList *) segptr;
			segmentend = segptr + tailSize;
		}

		switch (a_action)
		{
			case GIN_SEGMENT_DELETE:
				segptr += segsize;
				segno++;
				break;

			case GIN_SEGMENT_INSERT:
				/* copy the new segment in place */
				Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
				memcpy(writePtr, newseg, newsegsize);
				writePtr += newsegsize;
				break;

			case GIN_SEGMENT_REPLACE:
				/* copy the new version of segment in place */
				Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
				memcpy(writePtr, newseg, newsegsize);
				writePtr += newsegsize;
				segptr += segsize;
				segno++;
				break;

			default:
				elog(ERROR, "unexpected GIN leaf action: %u", a_action);
		}
		oldseg = (GinPostingList *) segptr;
	}

	/* Copy the rest of unmodified segments if any. */
	segptr = (Pointer) oldseg;
	if (segptr != segmentend && tailCopy)
	{
		int restSize = segmentend - segptr;

		Assert(writePtr + restSize <= PageGetSpecialPointer(page));
		memcpy(writePtr, segptr, restSize);
		writePtr += restSize;
	}

	totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page);
	GinDataPageSetDataSize(page, totalsize);
}
Exemplo n.º 2
0
static void
ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
{
	int			actionno;
	int			segno;
	GinPostingList *oldseg;
	Pointer		segmentend;
	char	   *walbuf;
	int			totalsize;

	/*
	 * If the page is in pre-9.4 format, convert to new___ format first.
	 */
	if (!GinPageIsCompressed(page))
	{
		ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page);
		int			nuncompressed = GinPageGetOpaque(page)->maxoff;
		int			npacked;
		GinPostingList *plist;

		plist = ginCompressPostingList(uncompressed, nuncompressed,
									   BLCKSZ, &npacked);
		Assert(npacked == nuncompressed);

		totalsize = SizeOfGinPostingList(plist);

		memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
		GinDataPageSetDataSize(page, totalsize);
		GinPageSetCompressed(page);
		GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
	}

	oldseg = GinDataLeafPageGetPostingList(page);
	segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
	segno = 0;

	walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf);
	for (actionno = 0; actionno < data->nactions; actionno++)
	{
		uint8		a_segno = *((uint8 *) (walbuf++));
		uint8		a_action = *((uint8 *) (walbuf++));
		GinPostingList *newseg = NULL;
		int			newsegsize = 0;
		ItemPointerData *items = NULL;
		uint16		nitems = 0;
		ItemPointerData *olditems;
		int			nolditems;
		ItemPointerData *newitems;
		int			nnewitems;
		int			segsize;
		Pointer		segptr;
		int			szleft;

		/* Extract all the information we need from the WAL record */
		if (a_action == GIN_SEGMENT_INSERT ||
			a_action == GIN_SEGMENT_REPLACE)
		{
			newseg = (GinPostingList *) walbuf;
			newsegsize = SizeOfGinPostingList(newseg);
			walbuf += SHORTALIGN(newsegsize);
		}

		if (a_action == GIN_SEGMENT_ADDITEMS)
		{
			memcpy(&nitems, walbuf, sizeof(uint16));
			walbuf += sizeof(uint16);
			items = (ItemPointerData *) walbuf;
			walbuf += nitems * sizeof(ItemPointerData);
		}

		/* Skip to the segment that this action concerns */
		Assert(segno <= a_segno);
		while (segno < a_segno)
		{
			oldseg = GinNextPostingListSegment(oldseg);
			segno++;
		}

		/*
		 * ADDITEMS action is handled like REPLACE, but the new___ segment to
		 * replace the old one is reconstructed using the old segment from
		 * disk and the new___ items from the WAL record.
		 */
		if (a_action == GIN_SEGMENT_ADDITEMS)
		{
			int			npacked;

			olditems = ginPostingListDecode(oldseg, &nolditems);

			newitems = ginMergeItemPointers(items, nitems,
											olditems, nolditems,
											&nnewitems);
			Assert(nnewitems == nolditems + nitems);

			newseg = ginCompressPostingList(newitems, nnewitems,
											BLCKSZ, &npacked);
			Assert(npacked == nnewitems);

			newsegsize = SizeOfGinPostingList(newseg);
			a_action = GIN_SEGMENT_REPLACE;
		}

		segptr = (Pointer) oldseg;
		if (segptr != segmentend)
			segsize = SizeOfGinPostingList(oldseg);
		else
		{
			/*
			 * Positioned after the last existing segment. Only INSERTs
			 * expected here.
			 */
			Assert(a_action == GIN_SEGMENT_INSERT);
			segsize = 0;
		}
		szleft = segmentend - segptr;

		switch (a_action)
		{
			case GIN_SEGMENT_DELETE:
				memmove(segptr, segptr + segsize, szleft - segsize);
				segmentend -= segsize;

				segno++;
				break;

			case GIN_SEGMENT_INSERT:
				/* make room for the new___ segment */
				memmove(segptr + newsegsize, segptr, szleft);
				/* copy the new___ segment in place */
				memcpy(segptr, newseg, newsegsize);
				segmentend += newsegsize;
				segptr += newsegsize;
				break;

			case GIN_SEGMENT_REPLACE:
				/* shift the segments that follow */
				memmove(segptr + newsegsize,
						segptr + segsize,
						szleft - segsize);
				/* copy the replacement segment in place */
				memcpy(segptr, newseg, newsegsize);
				segmentend -= segsize;
				segmentend += newsegsize;
				segptr += newsegsize;
				segno++;
				break;

			default:
				elog(ERROR, "unexpected GIN leaf action: %u", a_action);
		}
		oldseg = (GinPostingList *) segptr;
	}

	totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page);
	GinDataPageSetDataSize(page, totalsize);
}
Exemplo n.º 3
0
Datum
gin_leafpage_items(PG_FUNCTION_ARGS)
{
	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
	int			raw_page_size;
	FuncCallContext *fctx;
	gin_leafpage_items_state *inter_call_data;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use raw page functions"))));

	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext mctx;
		Page		page;
		GinPageOpaque opaq;

		if (raw_page_size < BLCKSZ)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				  errmsg("input page too small (%d bytes)", raw_page_size)));
		page = VARDATA(raw_page);

		if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("input page is not a valid GIN data leaf page"),
					 errdetail("Special size %d, expected %d",
							   (int) PageGetSpecialSize(page),
							   (int) MAXALIGN(sizeof(GinPageOpaqueData)))));

		opaq = (GinPageOpaque) PageGetSpecialPointer(page);
		if (opaq->flags != (GIN_DATA | GIN_LEAF | GIN_COMPRESSED))
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("input page is not a compressed GIN data leaf page"),
					 errdetail("Flags %04X, expected %04X",
							   opaq->flags,
							   (GIN_DATA | GIN_LEAF | GIN_COMPRESSED))));

		fctx = SRF_FIRSTCALL_INIT();
		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

		inter_call_data = palloc(sizeof(gin_leafpage_items_state));

		/* Build a tuple descriptor for our result type */
		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
			elog(ERROR, "return type must be a row type");

		inter_call_data->tupd = tupdesc;

		inter_call_data->seg = GinDataLeafPageGetPostingList(page);
		inter_call_data->lastseg = (GinPostingList *)
			(((char *) inter_call_data->seg) +
			 GinDataLeafPageGetPostingListSize(page));

		fctx->user_fctx = inter_call_data;

		MemoryContextSwitchTo(mctx);
	}

	fctx = SRF_PERCALL_SETUP();
	inter_call_data = fctx->user_fctx;

	if (inter_call_data->seg != inter_call_data->lastseg)
	{
		GinPostingList *cur = inter_call_data->seg;
		HeapTuple	resultTuple;
		Datum		result;
		Datum		values[3];
		bool		nulls[3];
		int			ndecoded,
					i;
		ItemPointer tids;
		Datum	   *tids_datum;

		memset(nulls, 0, sizeof(nulls));

		values[0] = ItemPointerGetDatum(&cur->first);
		values[1] = UInt16GetDatum(cur->nbytes);

		/* build an array of decoded item pointers */
		tids = ginPostingListDecode(cur, &ndecoded);
		tids_datum = (Datum *) palloc(ndecoded * sizeof(Datum));
		for (i = 0; i < ndecoded; i++)
			tids_datum[i] = ItemPointerGetDatum(&tids[i]);
		values[2] = PointerGetDatum(construct_array(tids_datum,
													ndecoded,
													TIDOID,
													sizeof(ItemPointerData),
													false, 's'));
		pfree(tids_datum);
		pfree(tids);

		/* Build and return the result tuple. */
		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
		result = HeapTupleGetDatum(resultTuple);

		inter_call_data->seg = GinNextPostingListSegment(cur);

		SRF_RETURN_NEXT(fctx, result);
	}
	else
		SRF_RETURN_DONE(fctx);
}