/* * Place tuple on page and fills WAL record */ static void entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); static XLogRecData rdata[3]; OffsetNumber placed; static ginxlogInsert data; int cnt = 0; *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); data.node = btree->index->rd_node; data.blkno = BufferGetBlockNumber(buf); data.offset = off; data.nitem = 1; data.isDelete = btree->isDelete; data.isData = false; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; /* * Prevent full page write if child's split occurs. That is needed to * remove incomplete splits while replaying WAL * * data.updateBlkno contains new block number (of newly created right * page) for recently splited page. */ if (data.updateBlkno == InvalidBlockNumber) { rdata[0].buffer = buf; rdata[0].buffer_std = TRUE; rdata[0].data = NULL; rdata[0].len = 0; rdata[0].next = &rdata[1]; cnt++; } rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (char *) &data; rdata[cnt].len = sizeof(ginxlogInsert); rdata[cnt].next = &rdata[cnt + 1]; cnt++; rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (char *) btree->entry; rdata[cnt].len = IndexTupleSize(btree->entry); rdata[cnt].next = NULL; btree->entry = NULL; }
/* * Place tuple on page and fills WAL record * * If the tuple doesn't fit, returns false without modifying the page. * * On insertion to an internal node, in addition to inserting the given item, * the downlink of the existing item at 'off' is updated to point to * 'updateblkno'. */ static GinPlaceToPageRC entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, XLogRecData **prdata, Page *newlpage, Page *newrpage) { GinBtreeEntryInsertData *insertData = insertPayload; Page page = BufferGetPage(buf); OffsetNumber off = stack->off; OffsetNumber placed; int cnt = 0; /* these must be static so they can be returned to caller */ static XLogRecData rdata[3]; static ginxlogInsertEntry data; /* quick exit if it doesn't fit */ if (!entryIsEnoughSpace(btree, buf, off, insertData)) { entrySplitPage(btree, buf, stack, insertPayload, updateblkno, prdata, newlpage, newrpage); return SPLIT; } START_CRIT_SECTION(); *prdata = rdata; entryPreparePage(btree, page, off, insertData, updateblkno); placed = PageAddItem(page, (Item) insertData->entry, IndexTupleSize(insertData->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); data.isDelete = insertData->isDelete; data.offset = off; rdata[cnt].buffer = buf; rdata[cnt].buffer_std = true; rdata[cnt].data = (char *) &data; rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple); rdata[cnt].next = &rdata[cnt + 1]; cnt++; rdata[cnt].buffer = buf; rdata[cnt].buffer_std = true; rdata[cnt].data = (char *) insertData->entry; rdata[cnt].len = IndexTupleSize(insertData->entry); rdata[cnt].next = NULL; return INSERTED; }
/* * Place tuple on page and fills WAL record */ static void entryPlaceToPage(RumBtree btree, Page page, OffsetNumber off) { OffsetNumber placed; entryPreparePage(btree, page, off); placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); btree->entry = NULL; }
/* * Place tuple on page and fills WAL record * * If the tuple doesn't fit, returns false without modifying the page. * * On insertion to an internal node, in addition to inserting the given item, * the downlink of the existing item at 'off' is updated to point to * 'updateblkno'. * * On INSERTED, registers the buffer as buffer ID 0, with data. * On SPLIT, returns rdata that represents the split pages in *prdata. */ static GinPlaceToPageRC entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, Page *newlpage, Page *newrpage) { GinBtreeEntryInsertData *insertData = insertPayload; Page page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); OffsetNumber off = stack->off; OffsetNumber placed; /* this must be static so it can be returned to caller. */ static ginxlogInsertEntry data; /* quick exit if it doesn't fit */ if (!entryIsEnoughSpace(btree, buf, off, insertData)) { entrySplitPage(btree, buf, stack, insertPayload, updateblkno, newlpage, newrpage); return SPLIT; } START_CRIT_SECTION(); entryPreparePage(btree, page, off, insertData, updateblkno); placed = PageAddItem(page, (Item) insertData->entry, IndexTupleSize(insertData->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); if (RelationNeedsWAL(btree->index)) { data.isDelete = insertData->isDelete; data.offset = off; XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) &data, offsetof(ginxlogInsertEntry, tuple)); XLogRegisterBufData(0, (char *) insertData->entry, IndexTupleSize(insertData->entry)); } return INSERTED; }
/* * Perform data insertion after beginPlaceToPage has decided it will fit. * * This is invoked within a critical section, and XLOG record creation (if * needed) is already started. The target buffer is registered in slot 0. */ static void entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, void *ptp_workspace) { GinBtreeEntryInsertData *insertData = insertPayload; Page page = BufferGetPage(buf); OffsetNumber off = stack->off; OffsetNumber placed; entryPreparePage(btree, page, off, insertData, updateblkno); placed = PageAddItem(page, (Item) insertData->entry, IndexTupleSize(insertData->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); if (RelationNeedsWAL(btree->index)) { /* * This must be static, because it has to survive until XLogInsert, * and we can't palloc here. Ugly, but the XLogInsert infrastructure * isn't reentrant anyway. */ static ginxlogInsertEntry data; data.isDelete = insertData->isDelete; data.offset = off; XLogRegisterBufData(0, (char *) &data, offsetof(ginxlogInsertEntry, tuple)); XLogRegisterBufData(0, (char *) insertData->entry, IndexTupleSize(insertData->entry)); } }
/* * Split entry page and insert new data. * * Returns new temp pages to *newlpage and *newrpage. * The original buffer is left untouched. */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage) { OffsetNumber off = stack->off; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); char tupstore[2 * BLCKSZ]; entryPreparePage(btree, lpage, off, insertData, updateblkno); /* * First, append all the existing tuples and the new tuple we're inserting * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } /* * Initialize the left and right pages, and copy all the tuples back to * them. */ GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; /* * Decide where to split. We try to equalize the pages' total data * size, not number of tuples. */ if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } /* return temp pages to caller */ *newlpage = lpage; *newrpage = rpage; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow pages filled with new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, XLogRecData **prdata, Page *newlpage, Page *newrpage) { GinBtreeEntryInsertData *insertData = insertPayload; OffsetNumber off = stack->off; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size tupstoresize; Size lsize = 0, size; char *ptr; IndexTuple itup; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); /* these must be static so they can be returned to caller */ static XLogRecData rdata[2]; static ginxlogSplitEntry data; static char tupstore[2 * BLCKSZ]; *prdata = rdata; entryPreparePage(btree, lpage, off, insertData, updateblkno); /* * First, append all the existing tuples and the new tuple we're inserting * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } tupstoresize = ptr - tupstore; /* * Initialize the left and right pages, and copy all the tuples back to * them. */ GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } data.separator = separator; data.nitem = maxoff; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplitEntry); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = tupstore; rdata[1].len = tupstoresize; rdata[1].next = NULL; *newlpage = lpage; *newrpage = rpage; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static Page entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { static XLogRecData rdata[2]; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; static char tupstore[2 * BLCKSZ]; char *ptr; IndexTuple itup, leftrightmost = NULL; static ginxlogSplit data; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid)); data.updateBlkno = entryPreparePage(btree, lpage, off); maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { leftrightmost = itup; lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } btree->entry = copyIndexTuple(leftrightmost, lpage); ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber); btree->rightblkno = BufferGetBlockNumber(rbuf); data.node = btree->index->rd_node; data.rootBlkno = InvalidBlockNumber; data.lblkno = BufferGetBlockNumber(lbuf); data.rblkno = BufferGetBlockNumber(rbuf); data.separator = separator; data.nitem = maxoff; data.isData = FALSE; data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; data.isRootSplit = FALSE; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = tupstore; rdata[1].len = MAXALIGN(totalsize); rdata[1].next = NULL; return lpage; }
/* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. * Tuples are distributed between pages by equal size on its, not * an equal number! */ static Page entrySplitPage(RumBtree btree, Buffer lbuf, Buffer rbuf, Page lPage, Page rPage, OffsetNumber off) { OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup, leftrightmost = NULL; Page page; Page newlPage = PageGetTempPageCopy(lPage); Size pageSize = PageGetPageSize(newlPage); static char tupstore[2 * BLCKSZ]; entryPreparePage(btree, newlPage, off); maxoff = PageGetMaxOffsetNumber(newlPage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(newlPage, PageGetItemId(newlPage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(btree->entry)); memcpy(ptr, btree->entry, size); totalsize += size + sizeof(ItemIdData); } RumInitPage(rPage, RumPageGetOpaque(newlPage)->flags, pageSize); RumInitPage(newlPage, RumPageGetOpaque(rPage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = newlPage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rPage; } else { leftrightmost = itup; lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } btree->entry = RumFormInteriorTuple(btree, leftrightmost, newlPage, BufferGetBlockNumber(lbuf)); btree->rightblkno = BufferGetBlockNumber(rbuf); return newlPage; }