/* * Forms union of oldtup and addtup, if union == oldtup then return NULL */ IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate) { bool neednew = FALSE; GISTENTRY oldentries[INDEX_MAX_KEYS], addentries[INDEX_MAX_KEYS]; bool oldisnull[INDEX_MAX_KEYS], addisnull[INDEX_MAX_KEYS]; IndexTuple newtup = NULL; int i; if (GistTupleIsInvalid(oldtup) || GistTupleIsInvalid(addtup)) return gist_form_invalid_tuple(ItemPointerGetBlockNumber(&(oldtup->t_tid))); gistDeCompressAtt(giststate, r, oldtup, NULL, (OffsetNumber) 0, oldentries, oldisnull); gistDeCompressAtt(giststate, r, addtup, NULL, (OffsetNumber) 0, addentries, addisnull); for (i = 0; i < r->rd_att->natts; i++) { gistMakeUnionKey(giststate, i, oldentries + i, oldisnull[i], addentries + i, addisnull[i], attrS + i, isnullS + i); if (neednew) /* we already need new key, so we can skip check */ continue; if (isnullS[i]) /* union of key may be NULL if and only if both keys are NULL */ continue; if (!addisnull[i]) { if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false) neednew = true; } } if (neednew) { /* need to update key */ newtup = gistFormTuple(giststate, r, attrS, isnullS, false); newtup->t_tid = oldtup->t_tid; } return newtup; }
/* * Forms union of oldtup and addtup, if union == oldtup then return NULL */ IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate) { bool neednew = false; GISTENTRY oldentries[INDEX_MAX_KEYS], addentries[INDEX_MAX_KEYS]; bool oldisnull[INDEX_MAX_KEYS], addisnull[INDEX_MAX_KEYS]; Datum attr[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; IndexTuple newtup = NULL; int i; gistDeCompressAtt(giststate, r, oldtup, NULL, (OffsetNumber) 0, oldentries, oldisnull); gistDeCompressAtt(giststate, r, addtup, NULL, (OffsetNumber) 0, addentries, addisnull); for (i = 0; i < r->rd_att->natts; i++) { gistMakeUnionKey(giststate, i, oldentries + i, oldisnull[i], addentries + i, addisnull[i], attr + i, isnull + i); if (neednew) /* we already need new key, so we can skip check */ continue; if (isnull[i]) /* union of key may be NULL if and only if both keys are NULL */ continue; if (!addisnull[i]) { if (oldisnull[i] || !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i])) neednew = true; } } if (neednew) { /* need to update key */ newtup = gistFormTuple(giststate, r, attr, isnull, false); newtup->t_tid = oldtup->t_tid; } return newtup; }
static void placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno) { GISTENTRY identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; bool toLeft = true; gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull); for (; attno < giststate->tupdesc->natts; attno++) { float lpenalty, rpenalty; GISTENTRY entry; gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE); lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]); gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE); rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]); if (lpenalty != rpenalty) { if (lpenalty > rpenalty) toLeft = false; break; } } if (toLeft) v->splitVector.spl_left[v->splitVector.spl_nleft++] = off; else v->splitVector.spl_right[v->splitVector.spl_nright++] = off; }
/* * find entry with lowest penalty */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber maxoff; OffsetNumber i; OffsetNumber which; float sum_grow, which_grow[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; maxoff = PageGetMaxOffsetNumber(p); *which_grow = -1.0; which = InvalidOffsetNumber; sum_grow = 1; gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); Assert(maxoff >= FirstOffsetNumber); Assert(!GistPageIsLeaf(p)); for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { int j; IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup)) { ereport(LOG, (errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery", RelationGetRelationName(r)))); continue; } sum_grow = 0; for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (which_grow[j] < 0 || usize < which_grow[j]) { which = i; which_grow[j] = usize; if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) sum_grow += usize; else { sum_grow = 1; break; } } } if (which == InvalidOffsetNumber) which = FirstOffsetNumber; return which; }
/* * Search an upper index page for the entry with lowest penalty for insertion * of the new index key contained in "it". * * Returns the index of the page entry to insert into. */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber result; OffsetNumber maxoff; OffsetNumber i; float best_penalty[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; int keep_current_best; Assert(!GistPageIsLeaf(p)); gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */ result = FirstOffsetNumber; /* * The index may have multiple columns, and there's a penalty value for * each column. The penalty associated with a column that appears earlier * in the index definition is strictly more important than the penalty of * a column that appears later in the index definition. * * best_penalty[j] is the best penalty we have seen so far for column j, * or -1 when we haven't yet examined column j. Array entries to the * right of the first -1 are undefined. */ best_penalty[0] = -1; /* * If we find a tuple that's exactly as good as the currently best one, we * could use either one. When inserting a lot of tuples with the same or * similar keys, it's preferable to descend down the same path when * possible, as that's more cache-friendly. On the other hand, if all * inserts land on the same leaf page after a split, we're never going to * insert anything to the other half of the split, and will end up using * only 50% of the available space. Distributing the inserts evenly would * lead to better space usage, but that hurts cache-locality during * insertion. To get the best of both worlds, when we find a tuple that's * exactly as good as the previous best, choose randomly whether to stick * to the old best, or use the new one. Once we decide to stick to the * old best, we keep sticking to it for any subsequent equally good tuples * we might find. This favors tuples with low offsets, but still allows * some inserts to go to other equally-good subtrees. * * keep_current_best is -1 if we haven't yet had to make a random choice * whether to keep the current best tuple. If we have done so, and * decided to keep it, keep_current_best is 1; if we've decided to * replace, keep_current_best is 0. (This state will be reset to -1 as * soon as we've made the replacement, but sometimes we make the choice in * advance of actually finding a replacement best tuple.) */ keep_current_best = -1; /* * Loop over tuples on page. */ maxoff = PageGetMaxOffsetNumber(p); Assert(maxoff >= FirstOffsetNumber); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); bool zero_penalty; int j; zero_penalty = true; /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; /* Compute penalty for this column. */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, false, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (usize > 0) zero_penalty = false; if (best_penalty[j] < 0 || usize < best_penalty[j]) { /* * New best penalty for column. Tentatively select this tuple * as the target, and record the best penalty. Then reset the * next column's penalty to "unknown" (and indirectly, the * same for all the ones to its right). This will force us to * adopt this tuple's penalty values as the best for all the * remaining columns during subsequent loop iterations. */ result = i; best_penalty[j] = usize; if (j < r->rd_att->natts - 1) best_penalty[j + 1] = -1; /* we have new best, so reset keep-it decision */ keep_current_best = -1; } else if (best_penalty[j] == usize) { /* * The current tuple is exactly as good for this column as the * best tuple seen so far. The next iteration of this loop * will compare the next column. */ } else { /* * The current tuple is worse for this column than the best * tuple seen so far. Skip the remaining columns and move on * to the next tuple, if any. */ zero_penalty = false; /* so outer loop won't exit */ break; } } /* * If we looped past the last column, and did not update "result", * then this tuple is exactly as good as the prior best tuple. */ if (j == r->rd_att->natts && result != i) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 0) { /* we choose to use the new tuple */ result = i; /* choose again if there are even more exactly-as-good ones */ keep_current_best = -1; } } /* * If we find a tuple with zero penalty for all columns, and we've * decided we don't want to search for another tuple with equal * penalty, there's no need to examine remaining tuples; just break * out of the loop and return it. */ if (zero_penalty) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 1) break; } } return result; }
/* * At page split, distribute tuples from the buffer of the split page to * new buffers for the created page halves. This also adjusts the downlinks * in 'splitinfo' to include the tuples in the buffers. */ void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, Relation r, int level, Buffer buffer, List *splitinfo) { RelocationBufferInfo *relocationBuffersInfos; bool found; GISTNodeBuffer *nodeBuffer; BlockNumber blocknum; IndexTuple itup; int splitPagesCount = 0, i; GISTENTRY entry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; GISTNodeBuffer oldBuf; ListCell *lc; /* If the splitted page doesn't have buffers, we have nothing to do. */ if (!LEVEL_HAS_BUFFERS(level, gfbb)) return; /* * Get the node buffer of the splitted page. */ blocknum = BufferGetBlockNumber(buffer); nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum, HASH_FIND, &found); if (!found) { /* The page has no buffer, so we have nothing to do. */ return; } /* * Make a copy of the old buffer, as we're going reuse it as the buffer * for the new left page, which is on the same block as the old page. * That's not true for the root page, but that's fine because we never * have a buffer on the root page anyway. The original algorithm as * described by Arge et al did, but it's of no use, as you might as well * read the tuples straight from the heap instead of the root buffer. */ Assert(blocknum != GIST_ROOT_BLKNO); memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer)); oldBuf.isTemp = true; /* Reset the old buffer, used for the new left page from now on */ nodeBuffer->blocksCount = 0; nodeBuffer->pageBuffer = NULL; nodeBuffer->pageBlocknum = InvalidBlockNumber; /* * Allocate memory for information about relocation buffers. */ splitPagesCount = list_length(splitinfo); relocationBuffersInfos = (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) * splitPagesCount); /* * Fill relocation buffers information for node buffers of pages produced * by split. */ i = 0; foreach(lc, splitinfo) { GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc); GISTNodeBuffer *newNodeBuffer; /* Decompress parent index tuple of node buffer page. */ gistDeCompressAtt(giststate, r, si->downlink, NULL, (OffsetNumber) 0, relocationBuffersInfos[i].entry, relocationBuffersInfos[i].isnull); /* * Create a node buffer for the page. The leftmost half is on the same * block as the old page before split, so for the leftmost half this * will return the original buffer. The tuples on the original buffer * were relinked to the temporary buffer, so the original one is now * empty. */ newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level); relocationBuffersInfos[i].nodeBuffer = newNodeBuffer; relocationBuffersInfos[i].splitinfo = si; i++; }
/* * Forms union of oldtup and addtup, if union == oldtup then return NULL */ static IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate) { GistEntryVector *evec; Datum datum; int datumsize; bool result, neednew = false; char isnull[INDEX_MAX_KEYS], whatfree[INDEX_MAX_KEYS]; Datum attr[INDEX_MAX_KEYS]; GISTENTRY centry[INDEX_MAX_KEYS], oldatt[INDEX_MAX_KEYS], addatt[INDEX_MAX_KEYS], *ev0p, *ev1p; bool olddec[INDEX_MAX_KEYS], adddec[INDEX_MAX_KEYS]; bool oldisnull[INDEX_MAX_KEYS], addisnull[INDEX_MAX_KEYS]; IndexTuple newtup = NULL; int j; evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ); evec->n = 2; ev0p = &(evec->vector[0]); ev1p = &(evec->vector[1]); gistDeCompressAtt(giststate, r, oldtup, NULL, (OffsetNumber) 0, oldatt, olddec, oldisnull); gistDeCompressAtt(giststate, r, addtup, NULL, (OffsetNumber) 0, addatt, adddec, addisnull); for (j = 0; j < r->rd_att->natts; j++) { if (oldisnull[j] && addisnull[j]) { isnull[j] = 'n'; attr[j] = (Datum) 0; whatfree[j] = FALSE; } else { FILLEV( oldisnull[j], oldatt[j].key, oldatt[j].bytes, addisnull[j], addatt[j].key, addatt[j].bytes ); datum = FunctionCall2(&giststate->unionFn[j], PointerGetDatum(evec), PointerGetDatum(&datumsize)); if (oldisnull[j] || addisnull[j]) { if (oldisnull[j]) neednew = true; } else { FunctionCall3(&giststate->equalFn[j], ev0p->key, datum, PointerGetDatum(&result)); if (!result) neednew = true; } if (olddec[j]) pfree(DatumGetPointer(oldatt[j].key)); if (adddec[j]) pfree(DatumGetPointer(addatt[j].key)); gistcentryinit(giststate, j, ¢ry[j], datum, NULL, NULL, (OffsetNumber) 0, datumsize, FALSE, FALSE); isnull[j] = ' '; attr[j] = centry[j].key; if ((!isAttByVal(giststate, j))) { whatfree[j] = TRUE; if (centry[j].key != datum) pfree(DatumGetPointer(datum)); } else whatfree[j] = FALSE; } } pfree(evec); if (neednew) { /* need to update key */ newtup = (IndexTuple) index_formtuple(giststate->tupdesc, attr, isnull); newtup->t_tid = oldtup->t_tid; } for (j = 0; j < r->rd_att->natts; j++) if (whatfree[j]) pfree(DatumGetPointer(attr[j])); return newtup; }
/* ** find entry with lowest penalty */ static OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber maxoff; OffsetNumber i; Datum datum; float usize; OffsetNumber which; float sum_grow, which_grow[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool IsNull, decompvec[INDEX_MAX_KEYS], isnull[INDEX_MAX_KEYS]; int j; maxoff = PageGetMaxOffsetNumber(p); *which_grow = -1.0; which = -1; sum_grow = 1; gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, decompvec, isnull); for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); sum_grow = 0; for (j = 0; j < r->rd_att->natts; j++) { datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull), FALSE, IsNull); gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j], &usize); if ((!isAttByVal(giststate, j)) && entry.key != datum) pfree(DatumGetPointer(entry.key)); if (which_grow[j] < 0 || usize < which_grow[j]) { which = i; which_grow[j] = usize; if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) sum_grow += usize; else { sum_grow = 1; break; } } } gistFreeAtt(r, identry, decompvec); return which; }
/* * Insert equivalent tuples to left or right page * with minimize penalty */ static void gistadjsubkey(Relation r, IndexTuple *itup, /* contains compressed entry */ int *len, GIST_SPLITVEC *v, GISTSTATE *giststate ) { int curlen; OffsetNumber *curwpos; bool decfree[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS], *ev0p, *ev1p; float lpenalty, rpenalty; GistEntryVector *evec; int datumsize; bool isnull[INDEX_MAX_KEYS]; int i, j; Datum datum; /* clear vectors */ curlen = v->spl_nleft; curwpos = v->spl_left; for (i = 0; i < v->spl_nleft; i++) if (v->spl_idgrp[v->spl_left[i]] == 0) { *curwpos = v->spl_left[i]; curwpos++; } else curlen--; v->spl_nleft = curlen; curlen = v->spl_nright; curwpos = v->spl_right; for (i = 0; i < v->spl_nright; i++) if (v->spl_idgrp[v->spl_right[i]] == 0) { *curwpos = v->spl_right[i]; curwpos++; } else curlen--; v->spl_nright = curlen; evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ); evec->n = 2; ev0p = &(evec->vector[0]); ev1p = &(evec->vector[1]); /* add equivalent tuple */ for (i = 0; i < *len; i++) { if (v->spl_idgrp[i + 1] == 0) /* already inserted */ continue; gistDeCompressAtt(giststate, r, itup[i], NULL, (OffsetNumber) 0, identry, decfree, isnull); v->spl_ngrp[v->spl_idgrp[i + 1]]--; if (v->spl_ngrp[v->spl_idgrp[i + 1]] == 0 && (v->spl_grpflag[v->spl_idgrp[i + 1]] & BOTH_ADDED) != BOTH_ADDED) { /* force last in group */ rpenalty = 1.0; lpenalty = (v->spl_grpflag[v->spl_idgrp[i + 1]] & LEFT_ADDED) ? 2.0 : 0.0; } else { /* where? */ for (j = 1; j < r->rd_att->natts; j++) { gistentryinit(entry, v->spl_lattr[j], r, NULL, (OffsetNumber) 0, v->spl_lattrsize[j], FALSE); gistpenalty(giststate, j, &entry, v->spl_lisnull[j], &identry[j], isnull[j], &lpenalty); gistentryinit(entry, v->spl_rattr[j], r, NULL, (OffsetNumber) 0, v->spl_rattrsize[j], FALSE); gistpenalty(giststate, j, &entry, v->spl_risnull[j], &identry[j], isnull[j], &rpenalty); if (lpenalty != rpenalty) break; } } /* add */ if (lpenalty < rpenalty) { v->spl_grpflag[v->spl_idgrp[i + 1]] |= LEFT_ADDED; v->spl_left[v->spl_nleft] = i + 1; v->spl_nleft++; for (j = 1; j < r->rd_att->natts; j++) { if (isnull[j] && v->spl_lisnull[j]) { v->spl_lattr[j] = (Datum) 0; v->spl_lattrsize[j] = 0; } else { FILLEV( v->spl_lisnull[j], v->spl_lattr[j], v->spl_lattrsize[j], isnull[j], identry[j].key, identry[j].bytes ); datum = FunctionCall2(&giststate->unionFn[j], PointerGetDatum(evec), PointerGetDatum(&datumsize)); if ((!isAttByVal(giststate, j)) && !v->spl_lisnull[j]) pfree(DatumGetPointer(v->spl_lattr[j])); v->spl_lattr[j] = datum; v->spl_lattrsize[j] = datumsize; v->spl_lisnull[j] = false; } } } else { v->spl_grpflag[v->spl_idgrp[i + 1]] |= RIGHT_ADDED; v->spl_right[v->spl_nright] = i + 1; v->spl_nright++; for (j = 1; j < r->rd_att->natts; j++) { if (isnull[j] && v->spl_risnull[j]) { v->spl_rattr[j] = (Datum) 0; v->spl_rattrsize[j] = 0; } else { FILLEV( v->spl_risnull[j], v->spl_rattr[j], v->spl_rattrsize[j], isnull[j], identry[j].key, identry[j].bytes ); datum = FunctionCall2(&giststate->unionFn[j], PointerGetDatum(evec), PointerGetDatum(&datumsize)); if ((!isAttByVal(giststate, j)) && !v->spl_risnull[j]) pfree(DatumGetPointer(v->spl_rattr[j])); v->spl_rattr[j] = datum; v->spl_rattrsize[j] = datumsize; v->spl_risnull[j] = false; } } } gistFreeAtt(r, identry, decfree); } pfree(evec); }