/* * Make unions of keys in IndexTuple vector (one union datum per index column). * Union Datums are returned into the attr/isnull arrays. * Resulting Datums aren't compressed. */ void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, Datum *attr, bool *isnull) { int i; GistEntryVector *evec; int attrsize; evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = 0; i < giststate->tupdesc->natts; i++) { int j; /* Collect non-null datums for this column */ evec->n = 0; for (j = 0; j < len; j++) { Datum datum; bool IsNull; datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, i, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, false, IsNull); evec->n++; } /* If this column was all NULLs, the union is NULL */ if (evec->n == 0) { attr[i] = (Datum) 0; isnull[i] = true; } else { if (evec->n == 1) { /* unionFn may expect at least two inputs */ evec->n = 2; evec->vector[1] = evec->vector[0]; } /* Make union and store in attr array */ attr[i] = FunctionCall2Coll(&giststate->unionFn[i], giststate->supportCollation[i], PointerGetDatum(evec), PointerGetDatum(&attrsize)); isnull[i] = false; } } }
/* * Decompress all keys in tuple */ void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull) { int i; for (i = 0; i < r->rd_att->natts; i++) { Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, FALSE, isnull[i]); } }
/* * Take a compressed entry, and install it on a page. Since we now know * where the entry will live, we decompress it and recompress it using * that knowledge (some compression routines may want to fish around * on the page, for example, or do something special for leaf nodes.) */ static OffsetNumber gistPageAddItem(GISTSTATE *giststate, Relation r, Page page, Item item, Size size, OffsetNumber offsetNumber, ItemIdFlags flags, GISTENTRY *dentry, IndexTuple *newtup) { GISTENTRY tmpcentry; IndexTuple itup = (IndexTuple) item; OffsetNumber retval; Datum datum; bool IsNull; /* * recompress the item given that we now know the exact page and * offset for insertion */ datum = index_getattr(itup, 1, r->rd_att, &IsNull); gistdentryinit(giststate, 0, dentry, datum, (Relation) 0, (Page) 0, (OffsetNumber) InvalidOffsetNumber, ATTSIZE(datum, r, 1, IsNull), FALSE, IsNull); gistcentryinit(giststate, 0, &tmpcentry, dentry->key, r, page, offsetNumber, dentry->bytes, FALSE); *newtup = gist_tuple_replacekey(r, tmpcentry, itup); retval = PageAddItem(page, (Item) *newtup, IndexTupleSize(*newtup), offsetNumber, flags); if (retval == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(r)); /* be tidy */ if (DatumGetPointer(tmpcentry.key) != NULL && tmpcentry.key != dentry->key && tmpcentry.key != datum) pfree(DatumGetPointer(tmpcentry.key)); return (retval); }
static void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY attdata[], bool decompvec[], bool isnull[]) { int i; Datum datum; for (i = 0; i < r->rd_att->natts; i++) { datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]), FALSE, isnull[i]); if (isAttByVal(giststate, i)) decompvec[i] = FALSE; else { if (attdata[i].key == datum || isnull[i]) decompvec[i] = FALSE; else decompvec[i] = TRUE; } } }
/* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * * The index tuple might represent either a heap tuple or a lower index page, * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate * whether recheck is needed. We recheck if any of the consistent() functions * request it. recheck is not interesting when examining a non-leaf entry, * since we must visit the lower index page if there's any doubt. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. * * We must decompress the key in the IndexTuple before passing it to the * sk_funcs (which actually are the opclass Consistent or Distance methods). * * Note that this function is always invoked in a short-lived memory context, * so we don't need to worry about cleaning up allocated memory, either here * or in the implementation of any Consistent or Distance methods. */ static bool gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, bool *recheck_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; double *distance_p; Relation r = scan->indexRelation; *recheck_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with * minimum possible distances. This means we'll always follow it to the * referenced page. * * GPDB: the virtual TIDs created for AO tables use the full range of * offset numbers from 0 to 65535. So a tuple on leaf page that looks like * an invalid tuple, is actually ok. */ if (!GistPageIsLeaf(page) && GistTupleIsInvalid(tuple)) { int i; for (i = 0; i < scan->numberOfOrderBys; i++) so->distances[i] = -get_float8_infinity(); return true; } /* Check whether it matches according to the Consistent functions */ while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* * On non-leaf page we can't conclude that child hasn't NULL * values because of assumption in GiST: union (VAL, NULL) is VAL. * But if on non-leaf page key IS NULL, then all children are * NULL. */ if (key->sk_flags & SK_SEARCHNULL) { if (GistPageIsLeaf(page) && !isNull) return false; } else { Assert(key->sk_flags & SK_SEARCHNOTNULL); if (isNull) return false; } } else if (isNull) { return false; } else { Datum test; bool recheck; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, the comparison operator's strategy number and subtype * from pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * We initialize the recheck flag to true (the safest assumption) * in case the Consistent function forgets to set it. */ recheck = true; test = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); if (!DatumGetBool(test)) return false; *recheck_p |= recheck; } key++; keySize--; } /* OK, it passes --- now let's compute the distances */ key = scan->orderByData; distance_p = so->distances; keySize = scan->numberOfOrderBys; while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null and sorts to the end */ *distance_p = get_float8_infinity(); } else { Datum dist; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, FALSE, isNull); /* * Call the Distance function to evaluate the distance. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, and the ordering operator's strategy number and subtype * from pg_amop. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * Note that Distance functions don't get a recheck argument. We * can't tolerate lossy distance calculations on leaf tuples; * there is no opportunity to re-sort the tuples afterwards. */ dist = FunctionCall4Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype)); *distance_p = DatumGetFloat8(dist); } key++; distance_p++; keySize--; } return true; }
/* * find entry with lowest penalty */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber maxoff; OffsetNumber i; OffsetNumber which; float sum_grow, which_grow[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; maxoff = PageGetMaxOffsetNumber(p); *which_grow = -1.0; which = InvalidOffsetNumber; sum_grow = 1; gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); Assert(maxoff >= FirstOffsetNumber); Assert(!GistPageIsLeaf(p)); for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { int j; IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup)) { ereport(LOG, (errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery", RelationGetRelationName(r)))); continue; } sum_grow = 0; for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (which_grow[j] < 0 || usize < which_grow[j]) { which = i; which_grow[j] = usize; if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) sum_grow += usize; else { sum_grow = 1; break; } } } if (which == InvalidOffsetNumber) which = FirstOffsetNumber; return which; }
bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey, Datum *attr, bool *isnull) { int i; GistEntryVector *evec; int attrsize; evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = startkey; i < giststate->tupdesc->natts; i++) { int j; evec->n = 0; if (!isnull[i]) { gistentryinit(evec->vector[evec->n], attr[i], NULL, NULL, (OffsetNumber) 0, FALSE); evec->n++; } for (j = 0; j < len; j++) { Datum datum; bool IsNull; if (GistTupleIsInvalid(itvec[j])) return FALSE; /* signals that union with invalid tuple => * result is invalid */ datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, i, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, FALSE, IsNull); evec->n++; } /* If this tuple vector was all NULLs, the union is NULL */ if (evec->n == 0) { attr[i] = (Datum) 0; isnull[i] = TRUE; } else { if (evec->n == 1) { evec->n = 2; evec->vector[1] = evec->vector[0]; } /* Make union and store in attr array */ attr[i] = FunctionCall2(&giststate->unionFn[i], PointerGetDatum(evec), PointerGetDatum(&attrsize)); isnull[i] = FALSE; } } return TRUE; }
/* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * * The index tuple might represent either a heap tuple or a lower index page, * depending on whether the containing page is a leaf page or not. * * On success return for a heap tuple, *recheck_p is set to indicate whether * the quals need to be rechecked. We recheck if any of the consistent() * functions request it. recheck is not interesting when examining a non-leaf * entry, since we must visit the lower index page if there's any doubt. * Similarly, *recheck_distances_p is set to indicate whether the distances * need to be rechecked, and it is also ignored for non-leaf entries. * * If we are doing an ordered scan, so->distances[] is filled with distance * data from the distance() functions before returning success. * * We must decompress the key in the IndexTuple before passing it to the * sk_funcs (which actually are the opclass Consistent or Distance methods). * * Note that this function is always invoked in a short-lived memory context, * so we don't need to worry about cleaning up allocated memory, either here * or in the implementation of any Consistent or Distance methods. */ static bool gistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, bool *recheck_p, bool *recheck_distances_p) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; double *distance_p; Relation r = scan->indexRelation; *recheck_p = false; *recheck_distances_p = false; /* * If it's a leftover invalid tuple from pre-9.1, treat it as a match with * minimum possible distances. This means we'll always follow it to the * referenced page. */ if (GistTupleIsInvalid(tuple)) { int i; if (GistPageIsLeaf(page)) /* shouldn't happen */ elog(ERROR, "invalid GiST tuple found on leaf page"); for (i = 0; i < scan->numberOfOrderBys; i++) so->distances[i] = -get_float8_infinity(); return true; } /* Check whether it matches according to the Consistent functions */ while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* * On non-leaf page we can't conclude that child hasn't NULL * values because of assumption in GiST: union (VAL, NULL) is VAL. * But if on non-leaf page key IS NULL, then all children are * NULL. */ if (key->sk_flags & SK_SEARCHNULL) { if (GistPageIsLeaf(page) && !isNull) return false; } else { Assert(key->sk_flags & SK_SEARCHNOTNULL); if (isNull) return false; } } else if (isNull) { return false; } else { Datum test; bool recheck; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, false, isNull); /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, the comparison operator's strategy number and subtype * from pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * We initialize the recheck flag to true (the safest assumption) * in case the Consistent function forgets to set it. */ recheck = true; test = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int16GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); if (!DatumGetBool(test)) return false; *recheck_p |= recheck; } key++; keySize--; } /* OK, it passes --- now let's compute the distances */ key = scan->orderByData; distance_p = so->distances; keySize = scan->numberOfOrderBys; while (keySize > 0) { Datum datum; bool isNull; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null and sorts to the end */ *distance_p = get_float8_infinity(); } else { Datum dist; bool recheck; GISTENTRY de; gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, false, isNull); /* * Call the Distance function to evaluate the distance. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, the ordering operator's strategy number and subtype from * pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) * * If the function sets the recheck flag, the returned distance is * a lower bound on the true distance and needs to be rechecked. * We initialize the flag to 'false'. This flag was added in * version 9.5; distance functions written before that won't know * about the flag, but are expected to never be lossy. */ recheck = false; dist = FunctionCall5Coll(&key->sk_func, key->sk_collation, PointerGetDatum(&de), key->sk_argument, Int16GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); *recheck_distances_p |= recheck; *distance_p = DatumGetFloat8(dist); } key++; distance_p++; keySize--; } return true; }
/* * Search an upper index page for the entry with lowest penalty for insertion * of the new index key contained in "it". * * Returns the index of the page entry to insert into. */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber result; OffsetNumber maxoff; OffsetNumber i; float best_penalty[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; int keep_current_best; Assert(!GistPageIsLeaf(p)); gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */ result = FirstOffsetNumber; /* * The index may have multiple columns, and there's a penalty value for * each column. The penalty associated with a column that appears earlier * in the index definition is strictly more important than the penalty of * a column that appears later in the index definition. * * best_penalty[j] is the best penalty we have seen so far for column j, * or -1 when we haven't yet examined column j. Array entries to the * right of the first -1 are undefined. */ best_penalty[0] = -1; /* * If we find a tuple that's exactly as good as the currently best one, we * could use either one. When inserting a lot of tuples with the same or * similar keys, it's preferable to descend down the same path when * possible, as that's more cache-friendly. On the other hand, if all * inserts land on the same leaf page after a split, we're never going to * insert anything to the other half of the split, and will end up using * only 50% of the available space. Distributing the inserts evenly would * lead to better space usage, but that hurts cache-locality during * insertion. To get the best of both worlds, when we find a tuple that's * exactly as good as the previous best, choose randomly whether to stick * to the old best, or use the new one. Once we decide to stick to the * old best, we keep sticking to it for any subsequent equally good tuples * we might find. This favors tuples with low offsets, but still allows * some inserts to go to other equally-good subtrees. * * keep_current_best is -1 if we haven't yet had to make a random choice * whether to keep the current best tuple. If we have done so, and * decided to keep it, keep_current_best is 1; if we've decided to * replace, keep_current_best is 0. (This state will be reset to -1 as * soon as we've made the replacement, but sometimes we make the choice in * advance of actually finding a replacement best tuple.) */ keep_current_best = -1; /* * Loop over tuples on page. */ maxoff = PageGetMaxOffsetNumber(p); Assert(maxoff >= FirstOffsetNumber); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); bool zero_penalty; int j; zero_penalty = true; /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; /* Compute penalty for this column. */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, false, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (usize > 0) zero_penalty = false; if (best_penalty[j] < 0 || usize < best_penalty[j]) { /* * New best penalty for column. Tentatively select this tuple * as the target, and record the best penalty. Then reset the * next column's penalty to "unknown" (and indirectly, the * same for all the ones to its right). This will force us to * adopt this tuple's penalty values as the best for all the * remaining columns during subsequent loop iterations. */ result = i; best_penalty[j] = usize; if (j < r->rd_att->natts - 1) best_penalty[j + 1] = -1; /* we have new best, so reset keep-it decision */ keep_current_best = -1; } else if (best_penalty[j] == usize) { /* * The current tuple is exactly as good for this column as the * best tuple seen so far. The next iteration of this loop * will compare the next column. */ } else { /* * The current tuple is worse for this column than the best * tuple seen so far. Skip the remaining columns and move on * to the next tuple, if any. */ zero_penalty = false; /* so outer loop won't exit */ break; } } /* * If we looped past the last column, and did not update "result", * then this tuple is exactly as good as the prior best tuple. */ if (j == r->rd_att->natts && result != i) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 0) { /* we choose to use the new tuple */ result = i; /* choose again if there are even more exactly-as-good ones */ keep_current_best = -1; } } /* * If we find a tuple with zero penalty for all columns, and we've * decided we don't want to search for another tuple with equal * penalty, there's no need to examine remaining tuples; just break * out of the loop and return it. */ if (zero_penalty) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 1) break; } } return result; }
static void gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl) { int i, j, lr; Datum *attr; bool *needfree, IsNull; int len, *attrsize; OffsetNumber *entries; GistEntryVector *evec; Datum datum; int datumsize; int reallen; bool *isnull; for (lr = 0; lr <= 1; lr++) { if (lr) { attrsize = spl->spl_lattrsize; attr = spl->spl_lattr; len = spl->spl_nleft; entries = spl->spl_left; isnull = spl->spl_lisnull; } else { attrsize = spl->spl_rattrsize; attr = spl->spl_rattr; len = spl->spl_nright; entries = spl->spl_right; isnull = spl->spl_risnull; } needfree = (bool *) palloc(((len == 1) ? 2 : len) * sizeof(bool)); evec = palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ); for (j = 1; j < r->rd_att->natts; j++) { reallen = 0; for (i = 0; i < len; i++) { if (spl->spl_idgrp[entries[i]]) continue; datum = index_getattr(itvec[entries[i] - 1], j + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, j, &(evec->vector[reallen]), datum, NULL, NULL, (OffsetNumber) 0, ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull), FALSE, IsNull); if ((!isAttByVal(giststate, j)) && evec->vector[reallen].key != datum) needfree[reallen] = TRUE; else needfree[reallen] = FALSE; reallen++; } if (reallen == 0) { datum = (Datum) 0; datumsize = 0; isnull[j] = true; } else { /* * evec->vector[0].bytes may be not defined, so form union * with itself */ if (reallen == 1) { evec->n = 2; memcpy((void *) &(evec->vector[1]), (void *) &(evec->vector[0]), sizeof(GISTENTRY)); } else evec->n = reallen; datum = FunctionCall2(&giststate->unionFn[j], PointerGetDatum(evec), PointerGetDatum(&datumsize)); isnull[j] = false; } for (i = 0; i < reallen; i++) if (needfree[i]) pfree(DatumGetPointer(evec->vector[i].key)); attr[j] = datum; attrsize[j] = datumsize; } pfree(evec); pfree(needfree); } }
/* * return union of itup vector */ static IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) { Datum attr[INDEX_MAX_KEYS]; bool whatfree[INDEX_MAX_KEYS]; char isnull[INDEX_MAX_KEYS]; GistEntryVector *evec; Datum datum; int datumsize, i, j; GISTENTRY centry[INDEX_MAX_KEYS]; bool *needfree; IndexTuple newtup; bool IsNull; int reallen; needfree = (bool *) palloc(((len == 1) ? 2 : len) * sizeof(bool)); evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ); for (j = 0; j < r->rd_att->natts; j++) { reallen = 0; for (i = 0; i < len; i++) { datum = index_getattr(itvec[i], j + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, j, &(evec->vector[reallen]), datum, NULL, NULL, (OffsetNumber) 0, ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull), FALSE, IsNull); if ((!isAttByVal(giststate, j)) && evec->vector[reallen].key != datum) needfree[reallen] = TRUE; else needfree[reallen] = FALSE; reallen++; } if (reallen == 0) { attr[j] = (Datum) 0; isnull[j] = 'n'; whatfree[j] = FALSE; } else { if (reallen == 1) { evec->n = 2; gistentryinit(evec->vector[1], evec->vector[0].key, r, NULL, (OffsetNumber) 0, evec->vector[0].bytes, FALSE); } else evec->n = reallen; datum = FunctionCall2(&giststate->unionFn[j], PointerGetDatum(evec), PointerGetDatum(&datumsize)); for (i = 0; i < reallen; i++) if (needfree[i]) pfree(DatumGetPointer(evec->vector[i].key)); gistcentryinit(giststate, j, ¢ry[j], datum, NULL, NULL, (OffsetNumber) 0, datumsize, FALSE, FALSE); isnull[j] = ' '; attr[j] = centry[j].key; if (!isAttByVal(giststate, j)) { whatfree[j] = TRUE; if (centry[j].key != datum) pfree(DatumGetPointer(datum)); } else whatfree[j] = FALSE; } } pfree(evec); pfree(needfree); newtup = (IndexTuple) index_formtuple(giststate->tupdesc, attr, isnull); for (j = 0; j < r->rd_att->natts; j++) if (whatfree[j]) pfree(DatumGetPointer(attr[j])); return newtup; }
/* ** find entry with lowest penalty */ static OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { OffsetNumber maxoff; OffsetNumber i; Datum datum; float usize; OffsetNumber which; float sum_grow, which_grow[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool IsNull, decompvec[INDEX_MAX_KEYS], isnull[INDEX_MAX_KEYS]; int j; maxoff = PageGetMaxOffsetNumber(p); *which_grow = -1.0; which = -1; sum_grow = 1; gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, decompvec, isnull); for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); sum_grow = 0; for (j = 0; j < r->rd_att->natts; j++) { datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull), FALSE, IsNull); gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j], &usize); if ((!isAttByVal(giststate, j)) && entry.key != datum) pfree(DatumGetPointer(entry.key)); if (which_grow[j] < 0 || usize < which_grow[j]) { which = i; which_grow[j] = usize; if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) sum_grow += usize; else { sum_grow = 1; break; } } } gistFreeAtt(r, identry, decompvec); return which; }
/* * gistSplit -- split a page in the tree. */ static IndexTuple * gistSplit(Relation r, Buffer buffer, IndexTuple *itup, /* contains compressed entry */ int *len, GISTSTATE *giststate, InsertIndexResult *res) { Page p; Buffer leftbuf, rightbuf; Page left, right; IndexTuple *lvectup, *rvectup, *newtup; BlockNumber lbknum, rbknum; GISTPageOpaque opaque; GIST_SPLITVEC v; GistEntryVector *entryvec; bool *decompvec; int i, j, nlen; int MaxGrpId = 1; Datum datum; bool IsNull; p = (Page) BufferGetPage(buffer); opaque = (GISTPageOpaque) PageGetSpecialPointer(p); /* * The root of the tree is the first block in the relation. If we're * about to split the root, we need to do some hocus-pocus to enforce * this guarantee. */ if (BufferGetBlockNumber(buffer) == GISTP_ROOT) { leftbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(leftbuf, opaque->flags); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); } else { leftbuf = buffer; IncrBufferRefCount(buffer); lbknum = BufferGetBlockNumber(buffer); left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData)); } rightbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(rightbuf, opaque->flags); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); /* generate the item array */ entryvec = palloc(GEVHDRSZ + (*len + 1) * sizeof(GISTENTRY)); entryvec->n = *len + 1; decompvec = (bool *) palloc((*len + 1) * sizeof(bool)); for (i = 1; i <= *len; i++) { datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, 0, &(entryvec->vector[i]), datum, r, p, i, ATTSIZE(datum, giststate->tupdesc, 1, IsNull), FALSE, IsNull); if ((!isAttByVal(giststate, 0)) && entryvec->vector[i].key != datum) decompvec[i] = TRUE; else decompvec[i] = FALSE; } /* * now let the user-defined picksplit function set up the split * vector; in entryvec have no null value!! */ FunctionCall2(&giststate->picksplitFn[0], PointerGetDatum(entryvec), PointerGetDatum(&v)); /* compatibility with old code */ if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber) v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len; if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber) v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len; v.spl_lattr[0] = v.spl_ldatum; v.spl_rattr[0] = v.spl_rdatum; v.spl_lisnull[0] = false; v.spl_risnull[0] = false; /* * if index is multikey, then we must to try get smaller bounding box * for subkey(s) */ if (r->rd_att->natts > 1) { v.spl_idgrp = (int *) palloc0(sizeof(int) * (*len + 1)); v.spl_grpflag = (char *) palloc0(sizeof(char) * (*len + 1)); v.spl_ngrp = (int *) palloc(sizeof(int) * (*len + 1)); MaxGrpId = gistfindgroup(giststate, entryvec->vector, &v); /* form union of sub keys for each page (l,p) */ gistunionsubkey(r, giststate, itup, &v); /* * if possible, we insert equivalent tuples with control by * penalty for a subkey(s) */ if (MaxGrpId > 1) gistadjsubkey(r, itup, len, &v, giststate); pfree(v.spl_idgrp); pfree(v.spl_grpflag); pfree(v.spl_ngrp); } /* clean up the entry vector: its keys need to be deleted, too */ for (i = 1; i <= *len; i++) if (decompvec[i]) pfree(DatumGetPointer(entryvec->vector[i].key)); pfree(entryvec); pfree(decompvec); /* form left and right vector */ lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nleft); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nright); for (i = 0; i < v.spl_nleft; i++) lvectup[i] = itup[v.spl_left[i] - 1]; for (i = 0; i < v.spl_nright; i++) rvectup[i] = itup[v.spl_right[i] - 1]; /* write on disk (may be need another split) */ if (gistnospace(right, rvectup, v.spl_nright)) { nlen = v.spl_nright; newtup = gistSplit(r, rightbuf, rvectup, &nlen, giststate, (res && rvectup[nlen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(rightbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_risnull[j]) pfree(DatumGetPointer(v.spl_rattr[j])); } else { OffsetNumber l; l = gistwritebuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber); WriteBuffer(rightbuf); if (res) ItemPointerSet(&((*res)->pointerData), rbknum, l); nlen = 1; newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1); newtup[0] = gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull); ItemPointerSet(&(newtup[0]->t_tid), rbknum, 1); } if (gistnospace(left, lvectup, v.spl_nleft)) { int llen = v.spl_nleft; IndexTuple *lntup; lntup = gistSplit(r, leftbuf, lvectup, &llen, giststate, (res && lvectup[llen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(leftbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_lisnull[j]) pfree(DatumGetPointer(v.spl_lattr[j])); newtup = gistjoinvector(newtup, &nlen, lntup, llen); pfree(lntup); } else { OffsetNumber l; l = gistwritebuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber); if (BufferGetBlockNumber(buffer) != GISTP_ROOT) PageRestoreTempPage(left, p); WriteBuffer(leftbuf); if (res) ItemPointerSet(&((*res)->pointerData), lbknum, l); nlen += 1; newtup = (IndexTuple *) repalloc((void *) newtup, sizeof(IndexTuple) * nlen); newtup[nlen - 1] = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull); ItemPointerSet(&(newtup[nlen - 1]->t_tid), lbknum, 1); } /* !!! pfree */ pfree(rvectup); pfree(lvectup); pfree(v.spl_left); pfree(v.spl_right); *len = nlen; return newtup; }
/* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * * We must decompress the key in the IndexTuple before passing it to the * sk_func (and we have previously overwritten the sk_func to use the * user-defined Consistent method, so we actually are invoking that). * * Note that this function is always invoked in a short-lived memory context, * so we don't need to worry about cleaning up allocated memory, either here * or in the implementation of any Consistent methods. */ static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan, OffsetNumber offset) { int keySize = scan->numberOfKeys; ScanKey key = scan->keyData; Relation r = scan->indexRelation; GISTScanOpaque so; Page p; GISTSTATE *giststate; so = (GISTScanOpaque) scan->opaque; giststate = so->giststate; p = BufferGetPage(so->curbuf); IncrIndexProcessed(); /* * Tuple doesn't restore after crash recovery because of incomplete insert */ if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple)) return true; while (keySize > 0) { Datum datum; bool isNull; Datum test; GISTENTRY de; datum = index_getattr(tuple, key->sk_attno, giststate->tupdesc, &isNull); if (key->sk_flags & SK_ISNULL) { /* * On non-leaf page we can't conclude that child hasn't NULL * values because of assumption in GiST: uinon (VAL, NULL) is VAL * But if on non-leaf page key IS NULL then all childs has NULL. */ Assert(key->sk_flags & SK_SEARCHNULL); if (GistPageIsLeaf(p) && !isNull) return false; } else if (isNull) { return false; } else { gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, p, offset, FALSE, isNull); /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison * datum, and the comparison operator's strategy number and * subtype from pg_amop. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) */ test = FunctionCall4(&key->sk_func, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), ObjectIdGetDatum(key->sk_subtype)); if (!DatumGetBool(test)) return false; } keySize--; key++; } return true; }
/* * trys to split page by attno key, in a case of null * values move its to separate page. */ void gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, GistSplitVector *v, GistEntryVector *entryvec, int attno) { int i; static OffsetNumber offNullTuples[MaxOffsetNumber]; int nOffNullTuples = 0; for (i = 1; i <= len; i++) { Datum datum; bool IsNull; if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) { gistSplitByInvalid(giststate, v, itup, len); return; } datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, attno, &(entryvec->vector[i]), datum, r, page, i, FALSE, IsNull); if (IsNull) offNullTuples[nOffNullTuples++] = i; } v->spl_leftvalid = v->spl_rightvalid = true; if (nOffNullTuples == len) { /* * Corner case: All keys in attno column are null, we should try to * split by keys in next column. It all keys in all columns are NULL * just split page half by half */ v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE; if (attno + 1 == r->rd_att->natts) gistSplitHalf(&v->splitVector, len); else gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1); } else if (nOffNullTuples > 0) { int j = 0; /* * We don't want to mix NULLs and not-NULLs keys on one page, so move * nulls to right page */ v->splitVector.spl_right = offNullTuples; v->splitVector.spl_nright = nOffNullTuples; v->spl_risnull[attno] = TRUE; v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); v->splitVector.spl_nleft = 0; for (i = 1; i <= len; i++) if (j < v->splitVector.spl_nright && offNullTuples[j] == i) j++; else v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; v->spl_equiv = NULL; gistunionsubkey(giststate, itup, v, attno); } else { /* * all keys are not-null */ entryvec->n = len + 1; if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts) { /* * Splitting on attno column is not optimized: there is a tuples * which can be freely left or right page, we will try to split * page by following columns */ if (v->spl_equiv == NULL) { /* * simple case: left and right keys for attno column are * equial */ gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1); } else { /* we should clean up vector from already distributed tuples */ IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple)); OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple)); int newlen = 0; GIST_SPLITVEC backupSplit = v->splitVector; for (i = 0; i < len; i++) if (v->spl_equiv[i + 1]) { map[newlen] = i + 1; newitup[newlen++] = itup[i]; } Assert(newlen > 0); backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft); backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright); gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1); /* merge result of subsplit */ for (i = 0; i < v->splitVector.spl_nleft; i++) backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1]; for (i = 0; i < v->splitVector.spl_nright; i++) backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; v->splitVector = backupSplit; /* reunion left and right datums */ gistunionsubkey(giststate, itup, v, attno); } } } }