/* * Redo recompression of posting list. Doing all the changes in-place is not * always possible, because it might require more space than we've on the page. * Instead, once modification is required we copy unprocessed tail of the page * into separately allocated chunk of memory for further reading original * versions of segments. Thanks to that we don't bother about moving page data * in-place. */ static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; Pointer tailCopy = NULL; Pointer writePtr; Pointer segptr; /* * If the page is in pre-9.4 format, convert to new format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; /* * Empty leaf pages are deleted as part of vacuum, but leftmost and * rightmost pages are never deleted. So, pg_upgrade'd from pre-9.4 * instances might contain empty leaf pages, and we need to handle * them correctly. */ if (nuncompressed > 0) { GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); totalsize = SizeOfGinPostingList(plist); Assert(npacked == nuncompressed); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); } else { totalsize = 0; } GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); writePtr = (Pointer) oldseg; segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { /* * Once modification is started and page tail is copied, we've * to copy unmodified segments. */ segsize = SizeOfGinPostingList(oldseg); if (tailCopy) { Assert(writePtr + segsize < PageGetSpecialPointer(page)); memcpy(writePtr, (Pointer) oldseg, segsize); } writePtr += segsize; oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new segment to * replace the old one is reconstructed using the old segment from * disk and the new items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } /* * We're about to start modification of the page. So, copy tail of the * page if it's not done already. */ if (!tailCopy && segptr != segmentend) { int tailSize = segmentend - segptr; tailCopy = (Pointer) palloc(tailSize); memcpy(tailCopy, segptr, tailSize); segptr = tailCopy; oldseg = (GinPostingList *) segptr; segmentend = segptr + tailSize; } switch (a_action) { case GIN_SEGMENT_DELETE: segptr += segsize; segno++; break; case GIN_SEGMENT_INSERT: /* copy the new segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* copy the new version of segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; segptr += segsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } /* Copy the rest of unmodified segments if any. */ segptr = (Pointer) oldseg; if (segptr != segmentend && tailCopy) { int restSize = segmentend - segptr; Assert(writePtr + restSize <= PageGetSpecialPointer(page)); memcpy(writePtr, segptr, restSize); writePtr += restSize; } totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }
static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; /* * If the page is in pre-9.4 format, convert to new___ format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); Assert(npacked == nuncompressed); totalsize = SizeOfGinPostingList(plist); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; Pointer segptr; int szleft; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new___ segment to * replace the old one is reconstructed using the old segment from * disk and the new___ items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } szleft = segmentend - segptr; switch (a_action) { case GIN_SEGMENT_DELETE: memmove(segptr, segptr + segsize, szleft - segsize); segmentend -= segsize; segno++; break; case GIN_SEGMENT_INSERT: /* make room for the new___ segment */ memmove(segptr + newsegsize, segptr, szleft); /* copy the new___ segment in place */ memcpy(segptr, newseg, newsegsize); segmentend += newsegsize; segptr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* shift the segments that follow */ memmove(segptr + newsegsize, segptr + segsize, szleft - segsize); /* copy the replacement segment in place */ memcpy(segptr, newseg, newsegsize); segmentend -= segsize; segmentend += newsegsize; segptr += newsegsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }
Datum gin_leafpage_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); int raw_page_size; FuncCallContext *fctx; gin_leafpage_items_state *inter_call_data; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); raw_page_size = VARSIZE(raw_page) - VARHDRSZ; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext mctx; Page page; GinPageOpaque opaq; if (raw_page_size < BLCKSZ) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); page = VARDATA(raw_page); if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page is not a valid GIN data leaf page"), errdetail("Special size %d, expected %d", (int) PageGetSpecialSize(page), (int) MAXALIGN(sizeof(GinPageOpaqueData))))); opaq = (GinPageOpaque) PageGetSpecialPointer(page); if (opaq->flags != (GIN_DATA | GIN_LEAF | GIN_COMPRESSED)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page is not a compressed GIN data leaf page"), errdetail("Flags %04X, expected %04X", opaq->flags, (GIN_DATA | GIN_LEAF | GIN_COMPRESSED)))); fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); inter_call_data = palloc(sizeof(gin_leafpage_items_state)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); inter_call_data->tupd = tupdesc; inter_call_data->seg = GinDataLeafPageGetPostingList(page); inter_call_data->lastseg = (GinPostingList *) (((char *) inter_call_data->seg) + GinDataLeafPageGetPostingListSize(page)); fctx->user_fctx = inter_call_data; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); inter_call_data = fctx->user_fctx; if (inter_call_data->seg != inter_call_data->lastseg) { GinPostingList *cur = inter_call_data->seg; HeapTuple resultTuple; Datum result; Datum values[3]; bool nulls[3]; int ndecoded, i; ItemPointer tids; Datum *tids_datum; memset(nulls, 0, sizeof(nulls)); values[0] = ItemPointerGetDatum(&cur->first); values[1] = UInt16GetDatum(cur->nbytes); /* build an array of decoded item pointers */ tids = ginPostingListDecode(cur, &ndecoded); tids_datum = (Datum *) palloc(ndecoded * sizeof(Datum)); for (i = 0; i < ndecoded; i++) tids_datum[i] = ItemPointerGetDatum(&tids[i]); values[2] = PointerGetDatum(construct_array(tids_datum, ndecoded, TIDOID, sizeof(ItemPointerData), false, 's')); pfree(tids_datum); pfree(tids); /* Build and return the result tuple. */ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); result = HeapTupleGetDatum(resultTuple); inter_call_data->seg = GinNextPostingListSegment(cur); SRF_RETURN_NEXT(fctx, result); } else SRF_RETURN_DONE(fctx); }