/* * Read in a buffer, using bulk-insert strategy if bistate isn't NULL. */ static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, BulkInsertState bistate) { Buffer buffer; /* If not bulk-insert, exactly like ReadBuffer */ if (!bistate) return ReadBuffer(relation, targetBlock); /* If we have the desired block already pinned, re-pin and return it */ if (bistate->current_buf != InvalidBuffer) { if (BufferGetBlockNumber(bistate->current_buf) == targetBlock) { IncrBufferRefCount(bistate->current_buf); return bistate->current_buf; } /* ... else drop the old buffer */ ReleaseBuffer(bistate->current_buf); bistate->current_buf = InvalidBuffer; } /* Perform a read using the buffer strategy */ buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock, RBM_NORMAL, bistate->strategy); /* Save the selected block as target for future inserts */ IncrBufferRefCount(buffer); bistate->current_buf = buffer; return buffer; }
static GinScanKey copyScanKeys(GinScanKey keys, uint32 nkeys) { GinScanKey newkeys; uint32 i, j; newkeys = (GinScanKey) palloc(sizeof(GinScanKeyData) * nkeys); memcpy(newkeys, keys, sizeof(GinScanKeyData) * nkeys); for (i = 0; i < nkeys; i++) { newkeys[i].scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * keys[i].nentries); memcpy(newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries); for (j = 0; j < keys[i].nentries; j++) { if (keys[i].scanEntry[j].buffer != InvalidBuffer) IncrBufferRefCount(keys[i].scanEntry[j].buffer); if (keys[i].scanEntry[j].master) { int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry; newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN; } } } return newkeys; }
Datum gistrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); GISTScanOpaque so; GISTSearchStack *o, *n, *tmp; scan->currentItemData = scan->currentMarkData; so = (GISTScanOpaque) scan->opaque; if (so->flags & GS_MRKBEFORE) so->flags |= GS_CURBEFORE; else so->flags &= ~GS_CURBEFORE; o = NULL; n = so->markstk; /* copy the parent stack from the current item data */ while (n != NULL) { tmp = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); tmp->lsn = n->lsn; tmp->parentlsn = n->parentlsn; tmp->block = n->block; tmp->next = o; o = tmp; n = n->next; } gistfreestack(so->stack); so->stack = o; /* Update curbuf: be sure to bump ref count on markbuf */ if (BufferIsValid(so->curbuf)) { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; } if (BufferIsValid(so->markbuf)) { IncrBufferRefCount(so->markbuf); so->curbuf = so->markbuf; } so->nPageData = so->markNPageData; so->curPageData = so->markNPageData; if ( so->markNPageData > 0 ) memcpy( so->pageData, so->markPageData, sizeof(MatchedItemPtr) * so->markNPageData ); PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ void btrestrpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else { /* * The scan moved to a new page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } } PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); } } MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* -------------------------------- * ExecStoreTuple * * This function is used to store a physical tuple into a specified * slot in the tuple table. * * tuple: tuple to store * slot: slot to store it in * buffer: disk buffer if tuple is in a disk page, else InvalidBuffer * shouldFree: true if ExecClearTuple should pfree() the tuple * when done with it * * If 'buffer' is not InvalidBuffer, the tuple table code acquires a pin * on the buffer which is held until the slot is cleared, so that the tuple * won't go away on us. * * shouldFree is normally set 'true' for tuples constructed on-the-fly. * It must always be 'false' for tuples that are stored in disk pages, * since we don't want to try to pfree those. * * Another case where it is 'false' is when the referenced tuple is held * in a tuple table slot belonging to a lower-level executor Proc node. * In this case the lower-level slot retains ownership and responsibility * for eventually releasing the tuple. When this method is used, we must * be certain that the upper-level Proc node will lose interest in the tuple * sooner than the lower-level one does! If you're not certain, copy the * lower-level tuple with heap_copytuple and let the upper-level table * slot assume ownership of the copy! * * Return value is just the passed-in slot pointer. * * NOTE: before PostgreSQL 8.1, this function would accept a NULL tuple * pointer and effectively behave like ExecClearTuple (though you could * still specify a buffer to pin, which would be an odd combination). * This saved a couple lines of code in a few places, but seemed more likely * to mask logic errors than to be really useful, so it's now disallowed. * -------------------------------- */ TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree) { /* * sanity checks */ Assert(tuple != NULL); Assert(slot != NULL); Assert(slot->tts_tupleDescriptor != NULL); /* passing shouldFree=true for a tuple on a disk page is not sane */ Assert(BufferIsValid(buffer) ? (!shouldFree) : true); /* * clear out any old contents of the slot */ if (!slot->tts_isempty) ExecClearTuple(slot); /* * store the new tuple into the specified slot. */ slot->tts_isempty = false; slot->tts_shouldFree = shouldFree; slot->tts_tuple = tuple; /* * If tuple is on a disk page, keep the page pinned as long as we hold a * pointer into it. We assume the caller already has such a pin. */ slot->tts_buffer = buffer; if (BufferIsValid(buffer)) IncrBufferRefCount(buffer); /* Mark extracted state invalid */ slot->tts_nvalid = 0; return slot; }
/* * hashrestrpos() -- restore scan to last saved position */ Datum hashrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; /* release pin on current data, if any */ if (BufferIsValid(so->hashso_curbuf)) _hash_dropbuf(rel, so->hashso_curbuf); so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(&(scan->currentItemData)); /* bump pin count on currentMarkData and copy to currentItemData */ if (ItemPointerIsValid(&(scan->currentMarkData))) { IncrBufferRefCount(so->hashso_mrkbuf); so->hashso_curbuf = so->hashso_mrkbuf; scan->currentItemData = scan->currentMarkData; } PG_RETURN_VOID(); }
/* * _bt_steppage() -- Step to next page containing valid data for scan * * On entry, so->currPos.buf must be pinned and read-locked. We'll drop * the lock and pin before moving to next page. * * On success exit, we hold pin and read-lock on the next interesting page, * and so->currPos is updated to contain data from that page. * * If there are no more matching records in the given direction, we drop all * locks and pins, set so->currPos.buf to InvalidBuffer, and return FALSE. */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; Relation rel; Page page; BTPageOpaque opaque; /* we must have the buffer pinned and locked */ Assert(BufferIsValid(so->currPos.buf)); /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, true); /* * Before we modify currPos, make a copy of the page data if there was a * mark position that needs it. */ if (so->markItemIndex >= 0) { /* bump pin on current buffer for assignment to mark buffer */ IncrBufferRefCount(so->currPos.buf); memcpy(&so->markPos, &so->currPos, offsetof(BTScanPosData, items[1]) + so->currPos.lastItem * sizeof(BTScanPosItem)); if (so->markTuples) memcpy(so->markTuples, so->currTuples, so->currPos.nextTupleOffset); so->markPos.itemIndex = so->markItemIndex; so->markItemIndex = -1; } rel = scan->indexRelation; if (ScanDirectionIsForward(dir)) { /* Walk right to the next page with data */ /* We must rely on the previously saved nextPage link! */ BlockNumber blkno = so->currPos.nextPage; /* Remember we left a page with data */ so->currPos.moreLeft = true; for (;;) { /* release the previous buffer */ _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; /* if we're at end of scan, give up */ if (blkno == P_NONE || !so->currPos.moreRight) return false; /* check for interrupts while we're not holding any buffer lock */ CHECK_FOR_INTERRUPTS(); /* step right one page */ so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); /* check for deleted page */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { PredicateLockPage(rel, blkno, scan->xs_snapshot); /* see if there are any matches on this page */ /* note that this will clear moreRight if we can stop */ if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque))) break; } /* nope, keep going */ blkno = opaque->btpo_next; } } else { /* Remember we left a page with data */ so->currPos.moreRight = true; /* * Walk left to the next page with data. This is much more complex * than the walk-right case because of the possibility that the page * to our left splits while we are in flight to it, plus the * possibility that the page we were on gets deleted after we leave * it. See nbtree/README for details. */ for (;;) { /* Done if we know there are no matching keys to the left */ if (!so->currPos.moreLeft) { _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; return false; } /* Step to next physical page */ so->currPos.buf = _bt_walk_left(rel, so->currPos.buf); /* if we're physically at end of index, return failure */ if (so->currPos.buf == InvalidBuffer) return false; /* * Okay, we managed to move left to a non-deleted page. Done if * it's not half-dead and contains matching tuples. Else loop back * and do it all again. */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot); /* see if there are any matches on this page */ /* note that this will clear moreLeft if we can stop */ if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page))) break; } } } return true; }
/* * rtdosplit -- split a page in the tree. * * rtpicksplit does the interesting work of choosing the split. * This routine just does the bit-pushing. */ static void rtdosplit(Relation r, Buffer buffer, RTSTACK *stack, IndexTuple itup, RTSTATE *rtstate) { Page p; Buffer leftbuf, rightbuf; Page left, right; ItemId itemid; IndexTuple item; IndexTuple ltup, rtup; OffsetNumber maxoff; OffsetNumber i; OffsetNumber leftoff, rightoff; BlockNumber lbknum, rbknum; BlockNumber bufblock; RTreePageOpaque opaque; bool *isnull; SPLITVEC v; OffsetNumber *spl_left, *spl_right; TupleDesc tupDesc; int n; OffsetNumber newitemoff; p = (Page) BufferGetPage(buffer); opaque = (RTreePageOpaque) PageGetSpecialPointer(p); rtpicksplit(r, p, &v, itup, rtstate); /* * The root of the tree is the first block in the relation. If we're * about to split the root, we need to do some hocus-pocus to enforce this * guarantee. */ if (BufferGetBlockNumber(buffer) == P_ROOT) { leftbuf = ReadBuffer(r, P_NEW); RTInitBuffer(leftbuf, opaque->flags); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); } else { leftbuf = buffer; IncrBufferRefCount(buffer); lbknum = BufferGetBlockNumber(buffer); left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData)); } rightbuf = ReadBuffer(r, P_NEW); RTInitBuffer(rightbuf, opaque->flags); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); spl_left = v.spl_left; spl_right = v.spl_right; leftoff = rightoff = FirstOffsetNumber; maxoff = PageGetMaxOffsetNumber(p); newitemoff = OffsetNumberNext(maxoff); /* * spl_left contains a list of the offset numbers of the tuples that will * go to the left page. For each offset number, get the tuple item, then * add the item to the left page. Similarly for the right side. */ /* fill left node */ for (n = 0; n < v.spl_nleft; n++) { i = *spl_left; if (i == newitemoff) item = itup; else { itemid = PageGetItemId(p, i); item = (IndexTuple) PageGetItem(p, itemid); } if (PageAddItem(left, (Item) item, IndexTupleSize(item), leftoff, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); leftoff = OffsetNumberNext(leftoff); spl_left++; /* advance in left split vector */ } /* fill right node */ for (n = 0; n < v.spl_nright; n++) { i = *spl_right; if (i == newitemoff) item = itup; else { itemid = PageGetItemId(p, i); item = (IndexTuple) PageGetItem(p, itemid); } if (PageAddItem(right, (Item) item, IndexTupleSize(item), rightoff, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); rightoff = OffsetNumberNext(rightoff); spl_right++; /* advance in right split vector */ } /* Make sure we consumed all of the split vectors, and release 'em */ Assert(*spl_left == InvalidOffsetNumber); Assert(*spl_right == InvalidOffsetNumber); pfree(v.spl_left); pfree(v.spl_right); if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) PageRestoreTempPage(left, p); WriteBuffer(leftbuf); WriteBuffer(rightbuf); /* * Okay, the page is split. We have three things left to do: * * 1) Adjust any active scans on this index to cope with changes we * introduced in its structure by splitting this page. * * 2) "Tighten" the bounding box of the pointer to the left page in the * parent node in the tree, if any. Since we moved a bunch of stuff off * the left page, we expect it to get smaller. This happens in the * internal insertion routine. * * 3) Insert a pointer to the right page in the parent. This may cause * the parent to split. If it does, we need to repeat steps one and two * for each split node in the tree. */ /* adjust active scans */ rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber); tupDesc = r->rd_att; isnull = (bool *) palloc(r->rd_rel->relnatts * sizeof(bool)); memset(isnull, false, r->rd_rel->relnatts * sizeof(bool)); ltup = index_form_tuple(tupDesc, &(v.spl_ldatum), isnull); rtup = index_form_tuple(tupDesc, &(v.spl_rdatum), isnull); pfree(isnull); pfree(DatumGetPointer(v.spl_ldatum)); pfree(DatumGetPointer(v.spl_rdatum)); /* set pointers to new child pages in the internal index tuples */ ItemPointerSet(&(ltup->t_tid), lbknum, 1); ItemPointerSet(&(rtup->t_tid), rbknum, 1); rtintinsert(r, stack, ltup, rtup, rtstate); pfree(ltup); pfree(rtup); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new___ page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else if (so->currPos.currPage == so->markPos.currPage) { /* * so->markItemIndex < 0 but mark and current positions are on the * same page. This would be an unusual case, where the scan moved to * a new___ index page after the mark, restored, and later restored again * without moving off the marked page. It is not clear that this code * can currently be reached, but it seems better to make this function * robust for this case than to Assert() or elog() that it can't * happen. * * We neither want to set so->markItemIndex >= 0 (because that could * cause a later move to a new___ page to redo the memcpy() executions) * nor re-execute the memcpy() functions for a restore within the same * page. The previous restore to this page already set everything * except markPos as it should be. */ so->currPos.itemIndex = so->markPos.itemIndex; } else { /* * The scan moved to a new___ page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } PG_RETURN_VOID(); }
/* * gistSplit -- split a page in the tree. */ static IndexTuple * gistSplit(Relation r, Buffer buffer, IndexTuple *itup, /* contains compressed entry */ int *len, GISTSTATE *giststate, InsertIndexResult *res) { Page p; Buffer leftbuf, rightbuf; Page left, right; IndexTuple *lvectup, *rvectup, *newtup; BlockNumber lbknum, rbknum; GISTPageOpaque opaque; GIST_SPLITVEC v; GistEntryVector *entryvec; bool *decompvec; int i, j, nlen; int MaxGrpId = 1; Datum datum; bool IsNull; p = (Page) BufferGetPage(buffer); opaque = (GISTPageOpaque) PageGetSpecialPointer(p); /* * The root of the tree is the first block in the relation. If we're * about to split the root, we need to do some hocus-pocus to enforce * this guarantee. */ if (BufferGetBlockNumber(buffer) == GISTP_ROOT) { leftbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(leftbuf, opaque->flags); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); } else { leftbuf = buffer; IncrBufferRefCount(buffer); lbknum = BufferGetBlockNumber(buffer); left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData)); } rightbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(rightbuf, opaque->flags); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); /* generate the item array */ entryvec = palloc(GEVHDRSZ + (*len + 1) * sizeof(GISTENTRY)); entryvec->n = *len + 1; decompvec = (bool *) palloc((*len + 1) * sizeof(bool)); for (i = 1; i <= *len; i++) { datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, 0, &(entryvec->vector[i]), datum, r, p, i, ATTSIZE(datum, giststate->tupdesc, 1, IsNull), FALSE, IsNull); if ((!isAttByVal(giststate, 0)) && entryvec->vector[i].key != datum) decompvec[i] = TRUE; else decompvec[i] = FALSE; } /* * now let the user-defined picksplit function set up the split * vector; in entryvec have no null value!! */ FunctionCall2(&giststate->picksplitFn[0], PointerGetDatum(entryvec), PointerGetDatum(&v)); /* compatibility with old code */ if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber) v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len; if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber) v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len; v.spl_lattr[0] = v.spl_ldatum; v.spl_rattr[0] = v.spl_rdatum; v.spl_lisnull[0] = false; v.spl_risnull[0] = false; /* * if index is multikey, then we must to try get smaller bounding box * for subkey(s) */ if (r->rd_att->natts > 1) { v.spl_idgrp = (int *) palloc0(sizeof(int) * (*len + 1)); v.spl_grpflag = (char *) palloc0(sizeof(char) * (*len + 1)); v.spl_ngrp = (int *) palloc(sizeof(int) * (*len + 1)); MaxGrpId = gistfindgroup(giststate, entryvec->vector, &v); /* form union of sub keys for each page (l,p) */ gistunionsubkey(r, giststate, itup, &v); /* * if possible, we insert equivalent tuples with control by * penalty for a subkey(s) */ if (MaxGrpId > 1) gistadjsubkey(r, itup, len, &v, giststate); pfree(v.spl_idgrp); pfree(v.spl_grpflag); pfree(v.spl_ngrp); } /* clean up the entry vector: its keys need to be deleted, too */ for (i = 1; i <= *len; i++) if (decompvec[i]) pfree(DatumGetPointer(entryvec->vector[i].key)); pfree(entryvec); pfree(decompvec); /* form left and right vector */ lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nleft); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nright); for (i = 0; i < v.spl_nleft; i++) lvectup[i] = itup[v.spl_left[i] - 1]; for (i = 0; i < v.spl_nright; i++) rvectup[i] = itup[v.spl_right[i] - 1]; /* write on disk (may be need another split) */ if (gistnospace(right, rvectup, v.spl_nright)) { nlen = v.spl_nright; newtup = gistSplit(r, rightbuf, rvectup, &nlen, giststate, (res && rvectup[nlen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(rightbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_risnull[j]) pfree(DatumGetPointer(v.spl_rattr[j])); } else { OffsetNumber l; l = gistwritebuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber); WriteBuffer(rightbuf); if (res) ItemPointerSet(&((*res)->pointerData), rbknum, l); nlen = 1; newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1); newtup[0] = gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull); ItemPointerSet(&(newtup[0]->t_tid), rbknum, 1); } if (gistnospace(left, lvectup, v.spl_nleft)) { int llen = v.spl_nleft; IndexTuple *lntup; lntup = gistSplit(r, leftbuf, lvectup, &llen, giststate, (res && lvectup[llen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(leftbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_lisnull[j]) pfree(DatumGetPointer(v.spl_lattr[j])); newtup = gistjoinvector(newtup, &nlen, lntup, llen); pfree(lntup); } else { OffsetNumber l; l = gistwritebuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber); if (BufferGetBlockNumber(buffer) != GISTP_ROOT) PageRestoreTempPage(left, p); WriteBuffer(leftbuf); if (res) ItemPointerSet(&((*res)->pointerData), lbknum, l); nlen += 1; newtup = (IndexTuple *) repalloc((void *) newtup, sizeof(IndexTuple) * nlen); newtup[nlen - 1] = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull); ItemPointerSet(&(newtup[nlen - 1]->t_tid), lbknum, 1); } /* !!! pfree */ pfree(rvectup); pfree(lvectup); pfree(v.spl_left); pfree(v.spl_right); *len = nlen; return newtup; }
/* -------------------------------- * ExecStoreTuple * * This function is used to store a physical tuple into a specified * slot in the tuple table. * * tuple: tuple to store * slot: slot to store it in * buffer: disk buffer if tuple is in a disk page, else InvalidBuffer * shouldFree: true if ExecClearTuple should pfree() the tuple * when done with it * * If 'buffer' is not InvalidBuffer, the tuple table code acquires a pin * on the buffer which is held until the slot is cleared, so that the tuple * won't go away on us. * * shouldFree is normally set 'true' for tuples constructed on-the-fly. * It must always be 'false' for tuples that are stored in disk pages, * since we don't want to try to pfree those. * * Another case where it is 'false' is when the referenced tuple is held * in a tuple table slot belonging to a lower-level executor Proc node. * In this case the lower-level slot retains ownership and responsibility * for eventually releasing the tuple. When this method is used, we must * be certain that the upper-level Proc node will lose interest in the tuple * sooner than the lower-level one does! If you're not certain, copy the * lower-level tuple with heap_copytuple and let the upper-level table * slot assume ownership of the copy! * * Return value is just the passed-in slot pointer. * * NOTE: before PostgreSQL 8.1, this function would accept a NULL tuple * pointer and effectively behave like ExecClearTuple (though you could * still specify a buffer to pin, which would be an odd combination). * This saved a couple lines of code in a few places, but seemed more likely * to mask logic errors than to be really useful, so it's now disallowed. * -------------------------------- */ TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree) { /* * sanity checks */ Assert(tuple != NULL); Assert(slot != NULL); Assert(slot->tts_tupleDescriptor != NULL); /* passing shouldFree=true for a tuple on a disk page is not sane */ Assert(BufferIsValid(buffer) ? (!shouldFree) : true); /* * Actually we are storing a memtuple! */ if(is_heaptuple_memtuple(tuple)) { Assert(buffer == InvalidBuffer); return ExecStoreMinimalTuple((MemTuple) tuple, slot, shouldFree); } /* * Free any old physical tuple belonging to the slot. */ free_heaptuple_memtuple(slot); /* * Store the new tuple into the specified slot. */ /* Clear tts_flags, here isempty set to false */ slot->PRIVATE_tts_flags = shouldFree ? TTS_SHOULDFREE : 0; /* store the tuple */ slot->PRIVATE_tts_heaptuple = (void *) tuple; /* Mark extracted state invalid */ slot->PRIVATE_tts_nvalid = 0; /* * If tuple is on a disk page, keep the page pinned as long as we hold a * pointer into it. We assume the caller already has such a pin. * * This is coded to optimize the case where the slot previously held a * tuple on the same disk page: in that case releasing and re-acquiring * the pin is a waste of cycles. This is a common situation during * seqscans, so it's worth troubling over. */ if (slot->tts_buffer != buffer) { if (BufferIsValid(slot->tts_buffer)) ReleaseBuffer(slot->tts_buffer); slot->tts_buffer = buffer; if (BufferIsValid(buffer)) IncrBufferRefCount(buffer); } return slot; }
/* * Start* functions setup state of searches: find correct buffer and locks it, * Stop* functions unlock buffer (but don't release!) */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; bool needUnlock = TRUE; if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } /* * We should find entry, and begin scan of posting tree * or just store posting list in memory */ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; entry->reduceResult = FALSE; entry->predictNumberResult = 0; if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); GinPostingTreeScan *gdi; Page page; LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = scanBeginPostingTree(gdi); /* * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* * Keep page content in memory to prevent durable page locking */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); pfree(gdi); entry->isFinished = FALSE; } else if (GinGetNPosting(itup) > 0) { entry->nlist = GinGetNPosting(itup); entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); entry->isFinished = FALSE; } } if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); }