/* * Trivial split: half of entries will be placed on one page * and another half - to another */ static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v) { OffsetNumber i, maxoff; BOX *unionL = NULL, *unionR = NULL; int nbytes; maxoff = entryvec->n - 1; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_nleft = v->spl_nright = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { BOX *cur = DatumGetBoxP(entryvec->vector[i].key); if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { v->spl_left[v->spl_nleft] = i; if (unionL == NULL) { unionL = (BOX *) palloc(sizeof(BOX)); *unionL = *cur; } else adjustBox(unionL, cur); v->spl_nleft++; } else { v->spl_right[v->spl_nright] = i; if (unionR == NULL) { unionR = (BOX *) palloc(sizeof(BOX)); *unionR = *cur; } else adjustBox(unionR, cur); v->spl_nright++; } } if (v->spl_ldatum_exists) adjustBox(unionL, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(unionL); if (v->spl_rdatum_exists) adjustBox(unionR, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(unionR); v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* * SP-GiST choose function */ Datum spg_box_quad_choose(PG_FUNCTION_ARGS) { spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); BOX *centroid = DatumGetBoxP(in->prefixDatum), *box = DatumGetBoxP(in->datum); out->resultType = spgMatchNode; out->result.matchNode.restDatum = BoxPGetDatum(box); /* nodeN will be set by core, when allTheSame. */ if (!in->allTheSame) out->result.matchNode.nodeN = getQuadrant(centroid, box); PG_RETURN_VOID(); }
Datum gist_point_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); if (entry->leafkey) /* Point, actually */ { BOX *box = palloc(sizeof(BOX)); Point *point = DatumGetPointP(entry->key); GISTENTRY *retval = palloc(sizeof(GISTENTRY)); box->high = box->low = *point; gistentryinit(*retval, BoxPGetDatum(box), entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); } PG_RETURN_POINTER(entry); }
/* * SP-GiST pick-split function * * It splits a list of boxes into quadrants by choosing a central 4D * point as the median of the coordinates of the boxes. */ Datum spg_box_quad_picksplit(PG_FUNCTION_ARGS) { spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); BOX *centroid; int median, i; double *lowXs = palloc(sizeof(double) * in->nTuples); double *highXs = palloc(sizeof(double) * in->nTuples); double *lowYs = palloc(sizeof(double) * in->nTuples); double *highYs = palloc(sizeof(double) * in->nTuples); /* Calculate median of all 4D coordinates */ for (i = 0; i < in->nTuples; i++) { BOX *box = DatumGetBoxP(in->datums[i]); lowXs[i] = box->low.x; highXs[i] = box->high.x; lowYs[i] = box->low.y; highYs[i] = box->high.y; } qsort(lowXs, in->nTuples, sizeof(double), compareDoubles); qsort(highXs, in->nTuples, sizeof(double), compareDoubles); qsort(lowYs, in->nTuples, sizeof(double), compareDoubles); qsort(highYs, in->nTuples, sizeof(double), compareDoubles); median = in->nTuples / 2; centroid = palloc(sizeof(BOX)); centroid->low.x = lowXs[median]; centroid->high.x = highXs[median]; centroid->low.y = lowYs[median]; centroid->high.y = highYs[median]; /* Fill the output */ out->hasPrefix = true; out->prefixDatum = BoxPGetDatum(centroid); out->nNodes = 16; out->nodeLabels = NULL; /* We don't need node labels. */ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); /* * Assign ranges to corresponding nodes according to quadrants * relative to the "centroid" range */ for (i = 0; i < in->nTuples; i++) { BOX *box = DatumGetBoxP(in->datums[i]); uint8 quadrant = getQuadrant(centroid, box); out->leafTupleDatums[i] = BoxPGetDatum(box); out->mapTuplesToNodes[i] = quadrant; } PG_RETURN_VOID(); }
/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan * * This is used for both boxes and points. */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i; OffsetNumber *listL, *listR, *listB, *listT; BOX *unionL, *unionR, *unionB, *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; OffsetNumber maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key); memcpy((void *) &pageunion, (void *) cur, sizeof(BOX)); /* find MBR */ for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (allisequal && ( pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y )) allisequal = false; adjustBox(&pageunion, cur); } if (allisequal) { /* * All entries are the same */ fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } nbytes = (maxoff + 2) * sizeof(OffsetNumber); listL = (OffsetNumber *) palloc(nbytes); listR = (OffsetNumber *) palloc(nbytes); listB = (OffsetNumber *) palloc(nbytes); listT = (OffsetNumber *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } #define LIMIT_RATIO 0.1 #define _IS_BADRATIO(x,y) ( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO ) #define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) ) /* bad disposition, try to split by centers of boxes */ if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { double avgCenterX = 0.0, avgCenterY = 0.0; double CenterX, CenterY; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); avgCenterX += ((double) cur->high.x + (double) cur->low.x) / 2.0; avgCenterY += ((double) cur->high.y + (double) cur->low.y) / 2.0; } avgCenterX /= maxoff; avgCenterY /= maxoff; posL = posR = posB = posT = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); CenterX = ((double) cur->high.x + (double) cur->low.x) / 2.0; CenterY = ((double) cur->high.y + (double) cur->low.y) / 2.0; if (CenterX < avgCenterX) ADDLIST(listL, unionL, posL, i); else if (CenterX == avgCenterX) { if (posL > posR) ADDLIST(listR, unionR, posR, i); else ADDLIST(listL, unionL, posL, i); } else ADDLIST(listR, unionR, posR, i); if (CenterY < avgCenterY) ADDLIST(listB, unionB, posB, i); else if (CenterY == avgCenterY) { if (posB > posT) ADDLIST(listT, unionT, posT, i); else ADDLIST(listB, unionB, posB, i); } else ADDLIST(listT, unionT, posT, i); } if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { Datum interLR = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionL), BoxPGetDatum(unionR)); Datum interBT = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionB), BoxPGetDatum(unionT)); double sizeLR, sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); PG_RETURN_POINTER(v); }
static void chooseLR(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, BOX *union1, OffsetNumber *list2, int nlist2, BOX *union2) { bool firstToLeft = true; if (v->spl_ldatum_exists || v->spl_rdatum_exists) { if (v->spl_ldatum_exists && v->spl_rdatum_exists) { BOX LRl = *union1, LRr = *union2; BOX RLl = *union2, RLr = *union1; double sizeLR, sizeRL; adjustBox(&LRl, DatumGetBoxP(v->spl_ldatum)); adjustBox(&LRr, DatumGetBoxP(v->spl_rdatum)); adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum)); adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum)); sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr))); sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr))); if (sizeLR > sizeRL) firstToLeft = false; } else { float p1, p2; GISTENTRY oldUnion, addon; gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum, NULL, NULL, InvalidOffsetNumber, FALSE); gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE); DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p1)); gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE); DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p2)); if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2)) firstToLeft = false; } } if (firstToLeft) { v->spl_left = list1; v->spl_right = list2; v->spl_nleft = nlist1; v->spl_nright = nlist2; if (v->spl_ldatum_exists) adjustBox(union1, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(union1); if (v->spl_rdatum_exists) adjustBox(union2, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(union2); } else { v->spl_left = list2; v->spl_right = list1; v->spl_nleft = nlist2; v->spl_nright = nlist1; if (v->spl_ldatum_exists) adjustBox(union2, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(union2); if (v->spl_rdatum_exists) adjustBox(union1, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(union1); } v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* ** The GiST PickSplit method ** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', ** C.H.Ang and T.C.Tan */ Datum gbox_picksplit(PG_FUNCTION_ARGS) { bytea *entryvec = (bytea *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i; OffsetNumber *listL, *listR, *listB, *listT; BOX *unionL, *unionR, *unionB, *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; OffsetNumber maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 1; cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[FirstOffsetNumber].key); memcpy((void *) &pageunion, (void *) cur, sizeof(BOX)); /* find MBR */ for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); if (allisequal == true && ( pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y )) allisequal = false; if (pageunion.high.x < cur->high.x) pageunion.high.x = cur->high.x; if (pageunion.low.x > cur->low.x) pageunion.low.x = cur->low.x; if (pageunion.high.y < cur->high.y) pageunion.high.y = cur->high.y; if (pageunion.low.y > cur->low.y) pageunion.low.y = cur->low.y; } nbytes = (maxoff + 2) * sizeof(OffsetNumber); listL = (OffsetNumber *) palloc(nbytes); listR = (OffsetNumber *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); if (allisequal) { cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[OffsetNumberNext(FirstOffsetNumber)].key); if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX)) == 0) { v->spl_left = listL; v->spl_right = listR; v->spl_nleft = v->spl_nright = 0; memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX)); memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX)); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { v->spl_left[v->spl_nleft] = i; v->spl_nleft++; } else { v->spl_right[v->spl_nright] = i; v->spl_nright++; } } v->spl_ldatum = BoxPGetDatum(unionL); v->spl_rdatum = BoxPGetDatum(unionR); PG_RETURN_POINTER(v); } } listB = (OffsetNumber *) palloc(nbytes); listT = (OffsetNumber *) palloc(nbytes); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( unionD->high.x < cur->high.x ) unionD->high.x = cur->high.x; \ if ( unionD->low.x > cur->low.x ) unionD->low.x = cur->low.x; \ if ( unionD->high.y < cur->high.y ) unionD->high.y = cur->high.y; \ if ( unionD->low.y > cur->low.y ) unionD->low.y = cur->low.y; \ } else { \ memcpy( (void*)unionD, (void*) cur, sizeof( BOX ) ); \ } \ list[pos] = num; \ (pos)++; \ } while(0) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } /* bad disposition, sort by ascending and resplit */ if ((posR == 0 || posL == 0) && (posT == 0 || posB == 0)) { KBsort *arr = (KBsort *) palloc(sizeof(KBsort) * maxoff); posL = posR = posB = posT = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { arr[i - 1].key = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); arr[i - 1].pos = i; } qsort(arr, maxoff, sizeof(KBsort), compare_KB); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = arr[i - 1].key; if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, arr[i - 1].pos); else if (cur->low.x - pageunion.low.x == pageunion.high.x - cur->high.x) { if (posL > posR) ADDLIST(listR, unionR, posR, arr[i - 1].pos); else ADDLIST(listL, unionL, posL, arr[i - 1].pos); } else ADDLIST(listR, unionR, posR, arr[i - 1].pos); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, arr[i - 1].pos); else if (cur->low.y - pageunion.low.y == pageunion.high.y - cur->high.y) { if (posB > posT) ADDLIST(listT, unionT, posT, arr[i - 1].pos); else ADDLIST(listB, unionB, posB, arr[i - 1].pos); } else ADDLIST(listT, unionT, posT, arr[i - 1].pos); } pfree(arr); } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { Datum interLR = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionL), BoxPGetDatum(unionR)); Datum interBT = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionB), BoxPGetDatum(unionT)); float sizeLR, sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') { pfree(unionB); pfree(listB); pfree(unionT); pfree(listT); v->spl_left = listL; v->spl_right = listR; v->spl_nleft = posL; v->spl_nright = posR; v->spl_ldatum = BoxPGetDatum(unionL); v->spl_rdatum = BoxPGetDatum(unionR); } else { pfree(unionR); pfree(listR); pfree(unionL); pfree(listL); v->spl_left = listB; v->spl_right = listT; v->spl_nleft = posB; v->spl_nright = posT; v->spl_ldatum = BoxPGetDatum(unionB); v->spl_rdatum = BoxPGetDatum(unionT); } PG_RETURN_POINTER(v); }