/* * PageGetTempPage * Get a temporary page in local memory for special processing */ Page PageGetTempPage(Page page, Size specialSize) { Size pageSize; Size size; Page temp; PageHeader thdr; pageSize = PageGetPageSize(page); temp = (Page) palloc(pageSize); thdr = (PageHeader) temp; /* copy old page in */ memcpy(temp, page, pageSize); /* clear out the middle */ size = pageSize - SizeOfPageHeaderData; size -= MAXALIGN(specialSize); MemSet(PageGetContents(thdr), 0, size); /* set high, low water marks */ thdr->pd_lower = SizeOfPageHeaderData; thdr->pd_upper = pageSize - MAXALIGN(specialSize); return temp; }
/* ------------------------------------------------- * GetHashPageStatistics() * * Collect statistics of single hash page * ------------------------------------------------- */ static void GetHashPageStatistics(Page page, HashPageStat * stat) { OffsetNumber maxoff = PageGetMaxOffsetNumber(page); HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page); int off; stat->dead_items = stat->live_items = 0; stat->page_size = PageGetPageSize(page); /* hash page opaque data */ stat->hasho_prevblkno = opaque->hasho_prevblkno; stat->hasho_nextblkno = opaque->hasho_nextblkno; stat->hasho_bucket = opaque->hasho_bucket; stat->hasho_flag = opaque->hasho_flag; stat->hasho_page_id = opaque->hasho_page_id; /* count live and dead tuples, and free space */ for (off = FirstOffsetNumber; off <= maxoff; off++) { ItemId id = PageGetItemId(page, off); if (!ItemIdIsDead(id)) stat->live_items++; else stat->dead_items++; } stat->free_size = PageGetFreeSpace(page); }
static bool parse_page(const DataPage *page, XLogRecPtr *lsn, uint16 *offset, uint16 *length) { const PageHeaderData *page_data = &page->page_data; /* Get lsn from page header */ *lsn = PageXLogRecPtrGet(page_data->pd_lsn); if (PageGetPageSize(page_data) == BLCKSZ && PageGetPageLayoutVersion(page_data) == PG_PAGE_LAYOUT_VERSION && (page_data->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && page_data->pd_lower >= SizeOfPageHeaderData && page_data->pd_lower <= page_data->pd_upper && page_data->pd_upper <= page_data->pd_special && page_data->pd_special <= BLCKSZ && page_data->pd_special == MAXALIGN(page_data->pd_special) && !XLogRecPtrIsInvalid(*lsn)) { *offset = page_data->pd_lower; *length = page_data->pd_upper - page_data->pd_lower; return true; } *offset = *length = 0; return false; }
/* * PageHeaderIsValid * Check that the header fields of a page appear valid. * * This is called when a page has just been read in from disk. The idea is * to cheaply detect trashed pages before we go nuts following bogus item * pointers, testing invalid transaction identifiers, etc. * * It turns out to be necessary to allow zeroed pages here too. Even though * this routine is *not* called when deliberately adding a page to a relation, * there are scenarios in which a zeroed page might be found in a table. * (Example: a backend extends a relation, then crashes before it can write * any WAL entry about the new page. The kernel will already have the * zeroed page in the file, and it will stay that way after restart.) So we * allow zeroed pages here, and are careful that the page access macros * treat such a page as empty and without free space. Eventually, VACUUM * will clean up such a page and make it usable. */ bool PageHeaderIsValid(PageHeader page) { char *pagebytes; int i; /* Check normal case */ if (PageGetPageSize(page) == BLCKSZ && PageGetPageLayoutVersion(page) == PG_PAGE_LAYOUT_VERSION && (page->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && page->pd_lower >= SizeOfPageHeaderData && page->pd_lower <= page->pd_upper && page->pd_upper <= page->pd_special && page->pd_special <= BLCKSZ && page->pd_special == MAXALIGN(page->pd_special)) return true; /* Check all-zeroes case */ pagebytes = (char *) page; for (i = 0; i < BLCKSZ; i++) { if (pagebytes[i] != 0) return false; } return true; }
/* * PageRestoreTempPage * Copy temporary page back to permanent page after special processing * and release the temporary page. */ void PageRestoreTempPage(Page tempPage, Page oldPage) { Size pageSize; pageSize = PageGetPageSize(tempPage); memcpy((char *) oldPage, (char *) tempPage, pageSize); pfree(tempPage); }
static Page GistPageGetCopyPage(Page page) { Size pageSize = PageGetPageSize(page); Page tmppage; tmppage = (Page) palloc(pageSize); memcpy(tmppage, page, pageSize); return tmppage; }
/* * PageGetTempPage * Get a temporary page in local memory for special processing. * The returned page is not initialized at all; caller must do that. */ Page PageGetTempPage(Page page) { Size pageSize; Page temp; pageSize = PageGetPageSize(page); temp = (Page) palloc(pageSize); return temp; }
/* * PageGetTempPageCopy * Get a temporary page in local memory for special processing. * The page is initialized by copying the contents of the given page. */ Page PageGetTempPageCopy(Page page) { Size pageSize; Page temp; pageSize = PageGetPageSize(page); temp = (Page) palloc(pageSize); memcpy(temp, page, pageSize); return temp; }
/* * PageGetTempPageCopySpecial * Get a temporary page in local memory for special processing. * The page is PageInit'd with the same special-space size as the * given page, and the special space is copied from the given page. */ Page PageGetTempPageCopySpecial(Page page) { Size pageSize; Page temp; pageSize = PageGetPageSize(page); temp = (Page) palloc(pageSize); PageInit(temp, pageSize, PageGetSpecialSize(page)); memcpy(PageGetSpecialPointer(temp), PageGetSpecialPointer(page), PageGetSpecialSize(page)); return temp; }
/* * _hash_checkpage -- sanity checks on the format of all hash pages */ void _hash_checkpage(Relation rel, Page page, int flags) { Assert(page); /* * When checking the metapage, always verify magic number and version. */ if (flags == LH_META_PAGE) { HashMetaPage metap = (HashMetaPage) page; if (metap->hashm_magic != HASH_MAGIC) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" is not a hash index", RelationGetRelationName(rel)))); if (metap->hashm_version != HASH_VERSION) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" has wrong hash version", RelationGetRelationName(rel)), errhint("Please REINDEX it."))); } /* * These other checks are for debugging purposes only. */ #ifdef USE_ASSERT_CHECKING Assert(((PageHeader) (page))->pd_lower >= SizeOfPageHeaderData); Assert(((PageHeader) (page))->pd_upper <= (BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData)))); Assert(((PageHeader) (page))->pd_special == (BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData)))); Assert(PageGetPageSize(page) == BLCKSZ); if (flags) { HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_flag & flags); } #endif /* USE_ASSERT_CHECKING */ }
Datum page_header(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); int raw_page_size; TupleDesc tupdesc; Datum result; HeapTuple tuple; Datum values[9]; bool nulls[9]; PageHeader page; XLogRecPtr lsn; char lsnchar[64]; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); raw_page_size = VARSIZE(raw_page) - VARHDRSZ; /* * Check that enough data was supplied, so that we don't try to access * fields outside the supplied buffer. */ if (raw_page_size < sizeof(PageHeaderData)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); page = (PageHeader) VARDATA(raw_page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Extract information from the page header */ lsn = PageGetLSN(page); snprintf(lsnchar, sizeof(lsnchar), "%X/%X", lsn.xlogid, lsn.xrecoff); values[0] = CStringGetTextDatum(lsnchar); values[1] = UInt16GetDatum(PageGetTLI(page)); values[2] = UInt16GetDatum(page->pd_flags); values[3] = UInt16GetDatum(page->pd_lower); values[4] = UInt16GetDatum(page->pd_upper); values[5] = UInt16GetDatum(page->pd_special); values[6] = UInt16GetDatum(PageGetPageSize(page)); values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); values[8] = TransactionIdGetDatum(page->pd_prune_xid); /* Build and return the tuple. */ memset(nulls, 0, sizeof(nulls)); tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }
/* * split page and fills WAL record. original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. In leaf page and build mode puts all * ItemPointers to pages. Also, in build mode splits data by way to full fulled * left page */ static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, void *insertdata, BlockNumber updateblkno, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); bool isleaf = GinPageIsLeaf(lpage); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; /* these must be static so they can be returned to caller */ static ginxlogSplitData data; static XLogRecData rdata[2]; static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *prdata = rdata; /* Update existing downlink to point to next page (on internal page) */ if (!isleaf) { PostingItem *pitem = GinDataPageGetPostingItem(lpage, off); PostingItemSetBlockNumber(pitem, updateblkno); } if (isleaf) { memcpy(vector, GinDataPageGetItemPointer(lpage, FirstOffsetNumber), maxoff * sizeof(ItemPointerData)); } else { memcpy(vector, GinDataPageGetPostingItem(lpage, FirstOffsetNumber), maxoff * sizeof(PostingItem)); } if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { /* append new items to the end */ GinBtreeDataLeafInsertData *items = insertdata; while (items->curitem < items->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { memcpy(vector + maxoff * sizeof(ItemPointerData), items->items + items->curitem, sizeof(ItemPointerData)); maxoff++; items->curitem++; } } else { ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); if (isleaf) { GinBtreeDataLeafInsertData *items = insertdata; memcpy(ptr, items->items + items->curitem, sizeofitem); items->curitem++; } else { PostingItem *pitem = insertdata; memcpy(ptr, pitem, sizeofitem); } maxoff++; } /* * we assume that during index creation the table scanned from beginning * to end, so ItemPointers are in monotonically increasing order. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; else separator = maxoff / 2; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); if (isleaf) memcpy(GinDataPageGetItemPointer(lpage, FirstOffsetNumber), vector, separator * sizeof(ItemPointerData)); else memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber), vector, separator * sizeof(PostingItem)); GinPageGetOpaque(lpage)->maxoff = separator; if (isleaf) memcpy(GinDataPageGetItemPointer(rpage, FirstOffsetNumber), vector + separator * sizeof(ItemPointerData), (maxoff - separator) * sizeof(ItemPointerData)); else memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber), vector + separator * sizeof(PostingItem), (maxoff - separator) * sizeof(PostingItem)); GinPageGetOpaque(rpage)->maxoff = maxoff - separator; /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); if (GinPageIsLeaf(lpage)) *bound = *GinDataPageGetItemPointer(lpage, GinPageGetOpaque(lpage)->maxoff); else *bound = GinDataPageGetPostingItem(lpage, GinPageGetOpaque(lpage)->maxoff)->key; /* set up right bound for right page */ bound = GinDataPageGetRightBound(rpage); *bound = oldbound; data.separator = separator; data.nitem = maxoff; data.rightbound = oldbound; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplitData); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = vector; rdata[1].len = maxoff * sizeofitem; rdata[1].next = NULL; return lpage; }
/* * Split entry page and insert new data. * * Returns new temp pages to *newlpage and *newrpage. * The original buffer is left untouched. */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage) { OffsetNumber off = stack->off; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); char tupstore[2 * BLCKSZ]; entryPreparePage(btree, lpage, off, insertData, updateblkno); /* * First, append all the existing tuples and the new tuple we're inserting * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } /* * Initialize the left and right pages, and copy all the tuples back to * them. */ GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; /* * Decide where to split. We try to equalize the pages' total data * size, not number of tuples. */ if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } /* return temp pages to caller */ *newlpage = lpage; *newrpage = rpage; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow pages filled with new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, XLogRecData **prdata, Page *newlpage, Page *newrpage) { GinBtreeEntryInsertData *insertData = insertPayload; OffsetNumber off = stack->off; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size tupstoresize; Size lsize = 0, size; char *ptr; IndexTuple itup; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); /* these must be static so they can be returned to caller */ static XLogRecData rdata[2]; static ginxlogSplitEntry data; static char tupstore[2 * BLCKSZ]; *prdata = rdata; entryPreparePage(btree, lpage, off, insertData, updateblkno); /* * First, append all the existing tuples and the new tuple we're inserting * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } tupstoresize = ptr - tupstore; /* * Initialize the left and right pages, and copy all the tuples back to * them. */ GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } data.separator = separator; data.nitem = maxoff; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplitEntry); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = tupstore; rdata[1].len = tupstoresize; rdata[1].next = NULL; *newlpage = lpage; *newrpage = rpage; }
/* ------------------------------------------------- * GetBTPageStatistics() * * Collect statistics of single b-tree page * ------------------------------------------------- */ static void GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) { Page page = BufferGetPage(buffer); PageHeader phdr = (PageHeader) page; OffsetNumber maxoff = PageGetMaxOffsetNumber(page); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); int item_size = 0; int off; stat->blkno = blkno; stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData); stat->dead_items = stat->live_items = 0; stat->page_size = PageGetPageSize(page); /* page type (flags) */ if (P_ISDELETED(opaque)) { stat->type = 'd'; stat->btpo.xact = opaque->btpo.xact; return; } else if (P_IGNORE(opaque)) stat->type = 'e'; else if (P_ISLEAF(opaque)) stat->type = 'l'; else if (P_ISROOT(opaque)) stat->type = 'r'; else stat->type = 'i'; /* btpage opaque data */ stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; stat->btpo.level = opaque->btpo.level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; /* count live and dead tuples, and free space */ for (off = FirstOffsetNumber; off <= maxoff; off++) { IndexTuple itup; ItemId id = PageGetItemId(page, off); itup = (IndexTuple) PageGetItem(page, id); item_size += IndexTupleSize(itup); if (!ItemIdIsDead(id)) stat->live_items++; else stat->dead_items++; } stat->free_size = PageGetFreeSpace(page); if ((stat->live_items + stat->dead_items) > 0) stat->avg_item_size = item_size / (stat->live_items + stat->dead_items); else stat->avg_item_size = 0; }
static const char * convert_gpdb4_heap_page(char *page) { VERSION4_PageHeaderData *oldhdr; PageHeader newhdr; OffsetNumber off; OffsetNumber maxoff; if (PageGetPageSize(page) != BLCKSZ) return "invalid block size on page"; /* Can only convert from GPDB4 format */ if (PageGetPageLayoutVersion(page) != 4) return "invalid page version"; oldhdr = (VERSION4_PageHeaderData *) page; /* Other checks that PageHeaderIsValid() normally performs */ if (!((oldhdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && oldhdr->pd_lower >= SizeOfPageHeaderData && oldhdr->pd_lower <= oldhdr->pd_upper && oldhdr->pd_upper <= oldhdr->pd_special && oldhdr->pd_special <= BLCKSZ && oldhdr->pd_special == MAXALIGN(oldhdr->pd_special))) return "invalid header"; /* * Ok, it's a valid heap page, from GPDB4. We know how to convert that! */ /* 1. pd_prune_xid was added to page header */ /* 2. Line pointer flags were changed */ /* 3. HEAP_COMPRESSED flag was removed */ /* 4. On-disk representation of numerics was changed */ /* * Also, money datatype was widened from 32 to 64 bits. (pg_upgrade * should've refused the upgrade) */ /* * First, check if there is enough space on the page, after we expand the header. */ oldhdr = (VERSION4_PageHeaderData *) page; newhdr = (PageHeader) page; /* * If there isn't enough space on this page for the new header field, relocate a tuple */ make_room(page); /* * There is space. Move the line pointers. We also convert the line pointer flags * while we're at it. Begin from end to beginning, so that we don't overwrite items * we haven't processed yet. */ maxoff = (oldhdr->pd_lower - VERSION4_SizeOfPageHeaderData) / sizeof(ItemIdData); /* PageGetMaxOffsetNumber */ for (off = maxoff; off >= 1; off--) { ItemIdData iid = oldhdr->pd_linp[off - 1]; /* PageGetItemId */ if (iid.lp_flags == VERSION4_LP_UNUSED) iid.lp_flags = LP_UNUSED; else if (iid.lp_flags == VERSION4_LP_USED) iid.lp_flags = LP_NORMAL; else { /* LP_DELETE and LP_USED were never used on heap pages. */ return "unexpected LP_DELETE or LP_DEAD line pointer on old-format heap page"; } newhdr->pd_linp[off - 1] = iid; } newhdr->pd_lower = (char *) &newhdr->pd_linp[maxoff] - (char *) page; /* Initialize the field that was added after version 4 format */ newhdr->pd_prune_xid = 0; /* * Ok, the page header and line pointers are in the new format now. Mangle * the tuples themselves */ for (off = 1; off <= maxoff; off++) { ItemId iid = PageGetItemId(page, off); /* we can use PageGetItemId now */ HeapTupleHeader htup; if (!ItemIdIsNormal(iid)) continue; htup = (HeapTupleHeader) PageGetItem(page, iid); convert_heaptuple(htup); } /* * Finally, change the version number. */ PageSetPageSizeAndVersion(page, BLCKSZ, TARGET_PAGE_VERSION); return NULL; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static Page entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { static XLogRecData rdata[2]; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; static char tupstore[2 * BLCKSZ]; char *ptr; IndexTuple itup, leftrightmost = NULL; static ginxlogSplit data; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid)); data.updateBlkno = entryPreparePage(btree, lpage, off); maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { leftrightmost = itup; lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } btree->entry = copyIndexTuple(leftrightmost, lpage); ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber); btree->rightblkno = BufferGetBlockNumber(rbuf); data.node = btree->index->rd_node; data.rootBlkno = InvalidBlockNumber; data.lblkno = BufferGetBlockNumber(lbuf); data.rblkno = BufferGetBlockNumber(rbuf); data.separator = separator; data.nitem = maxoff; data.isData = FALSE; data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; data.isRootSplit = FALSE; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = tupstore; rdata[1].len = MAXALIGN(totalsize); rdata[1].next = NULL; return lpage; }
/* * split page and fills WAL record. original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. In leaf page and build mode puts all * ItemPointers to pages. Also, in build mode splits data by way to full fulled * left page */ static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; uint32 nCopied = 1; /* these must be static so they can be returned to caller */ static ginxlogSplit data; static XLogRecData rdata[4]; static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem)); data.updateBlkno = dataPrepareData(btree, lpage, off); memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber), maxoff * sizeofitem); if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { nCopied = 0; while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem, sizeof(ItemPointerData)); maxoff++; nCopied++; btree->curitem++; } } else { ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); if (GinPageIsLeaf(lpage)) { memcpy(ptr, btree->items + btree->curitem, sizeofitem); btree->curitem++; } else memcpy(ptr, &(btree->pitem), sizeofitem); maxoff++; } /* * we suppose that during index creation table scaned from begin to end, * so ItemPointers are monotonically increased.. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; else separator = maxoff / 2; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem); GinPageGetOpaque(lpage)->maxoff = separator; memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber), vector + separator * sizeofitem, (maxoff - separator) * sizeofitem); GinPageGetOpaque(rpage)->maxoff = maxoff - separator; PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); if (GinPageIsLeaf(lpage)) btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff); else btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key; btree->rightblkno = BufferGetBlockNumber(rbuf); /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); *bound = btree->pitem.key; /* set up right bound for right page */ bound = GinDataPageGetRightBound(rpage); *bound = oldbound; data.node = btree->index->rd_node; data.rootBlkno = InvalidBlockNumber; data.lblkno = BufferGetBlockNumber(lbuf); data.rblkno = BufferGetBlockNumber(rbuf); data.separator = separator; data.nitem = maxoff; data.isData = TRUE; data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; data.isRootSplit = FALSE; data.rightbound = oldbound; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = vector; rdata[1].len = MAXALIGN(maxoff * sizeofitem); rdata[1].next = NULL; return lpage; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static Page entrySplitPage(RumBtree btree, Buffer lbuf, Buffer rbuf, Page lPage, Page rPage, OffsetNumber off) { OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup, leftrightmost = NULL; Page page; Page newlPage = PageGetTempPageCopy(lPage); Size pageSize = PageGetPageSize(newlPage); static char tupstore[2 * BLCKSZ]; entryPreparePage(btree, newlPage, off); maxoff = PageGetMaxOffsetNumber(newlPage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(newlPage, PageGetItemId(newlPage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); totalsize += size + sizeof(ItemIdData); } RumInitPage(rPage, RumPageGetOpaque(newlPage)->flags, pageSize); RumInitPage(newlPage, RumPageGetOpaque(rPage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = newlPage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rPage; } else { leftrightmost = itup; lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } btree->entry = RumFormInteriorTuple(btree, leftrightmost, newlPage, BufferGetBlockNumber(lbuf)); btree->rightblkno = BufferGetBlockNumber(rbuf); return newlPage; }