/* * Trivial split: half of entries will be placed on one page * and another half - to another */ static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v) { OffsetNumber i, maxoff; BOX *unionL = NULL, *unionR = NULL; int nbytes; maxoff = entryvec->n - 1; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_nleft = v->spl_nright = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { BOX *cur = DatumGetBoxP(entryvec->vector[i].key); if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { v->spl_left[v->spl_nleft] = i; if (unionL == NULL) { unionL = (BOX *) palloc(sizeof(BOX)); *unionL = *cur; } else adjustBox(unionL, cur); v->spl_nleft++; } else { v->spl_right[v->spl_nright] = i; if (unionR == NULL) { unionR = (BOX *) palloc(sizeof(BOX)); *unionR = *cur; } else adjustBox(unionR, cur); v->spl_nright++; } } if (v->spl_ldatum_exists) adjustBox(unionL, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(unionL); if (v->spl_rdatum_exists) adjustBox(unionR, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(unionR); v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* * Trivial split: half of entries will be placed on one page * and another half - to another */ static void fallbackSplit(struct gist_entry_vector *entryvec, struct gist_splitvec *v) { item_id_t i, maxoff; BOX *unionL = NULL; BOX *unionR = NULL; int nbytes; maxoff = entryvec->n - 1; nbytes = (maxoff + 2) * sizeof(item_id_t); v->spl_left = (item_id_t *) palloc(nbytes); v->spl_right = (item_id_t *) palloc(nbytes); v->spl_nleft = v->spl_nright = 0; for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { BOX *cur; cur = D_TO_BOX_P(entryvec->vector[i].key); if (i <= (maxoff - FIRST_ITEM_ID + 1) / 2) { v->spl_left[v->spl_nleft] = i; if (unionL == NULL) { unionL = (BOX *) palloc(sizeof(BOX)); *unionL = *cur; } else { adjustBox(unionL, cur); } v->spl_nleft++; } else { v->spl_right[v->spl_nright] = i; if (unionR == NULL) { unionR = (BOX *) palloc(sizeof(BOX)); *unionR = *cur; } else { adjustBox(unionR, cur); } v->spl_nright++; } } if (v->spl_ldatum_exists) adjustBox(unionL, D_TO_BOX_P(v->spl_ldatum)); v->spl_ldatum = BOX_P_TO_D(unionL); if (v->spl_rdatum_exists) adjustBox(unionR, D_TO_BOX_P(v->spl_rdatum)); v->spl_rdatum = BOX_P_TO_D(unionR); v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* * The GiST Union method for boxes * * returns the minimal bounding box that encloses all the entries in entryvec */ datum_t gist_box_union(PG_FUNC_ARGS) { struct gist_entry_vector* entryvec = (struct gist_entry_vector*) ARG_POINTER(0); int* sizep = (int*) ARG_POINTER(1); int numranges, i; BOX *cur; BOX *pageunion; numranges = entryvec->n; pageunion = (BOX *) palloc(sizeof(BOX)); cur = D_TO_BOX_P(entryvec->vector[0].key); memcpy((void *)pageunion, (void *)cur, sizeof(BOX)); for (i = 1; i < numranges; i++) { cur = D_TO_BOX_P(entryvec->vector[i].key); adjustBox(pageunion, cur); } *sizep = sizeof(BOX); RET_POINTER(pageunion); }
/* * The GiST Union method for boxes * * returns the minimal bounding box that encloses all the entries in entryvec */ Datum gist_box_union(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); int *sizep = (int *) PG_GETARG_POINTER(1); int numranges, i; BOX *cur, *pageunion; numranges = entryvec->n; pageunion = (BOX *) palloc(sizeof(BOX)); cur = DatumGetBoxP(entryvec->vector[0].key); memcpy((void *) pageunion, (void *) cur, sizeof(BOX)); for (i = 1; i < numranges; i++) { cur = DatumGetBoxP(entryvec->vector[i].key); adjustBox(pageunion, cur); } *sizep = sizeof(BOX); PG_RETURN_POINTER(pageunion); }
/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan * * This is used for both boxes and points. */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i; OffsetNumber *listL, *listR, *listB, *listT; BOX *unionL, *unionR, *unionB, *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; OffsetNumber maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key); memcpy((void *) &pageunion, (void *) cur, sizeof(BOX)); /* find MBR */ for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (allisequal && ( pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y )) allisequal = false; adjustBox(&pageunion, cur); } if (allisequal) { /* * All entries are the same */ fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } nbytes = (maxoff + 2) * sizeof(OffsetNumber); listL = (OffsetNumber *) palloc(nbytes); listR = (OffsetNumber *) palloc(nbytes); listB = (OffsetNumber *) palloc(nbytes); listT = (OffsetNumber *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } #define LIMIT_RATIO 0.1 #define _IS_BADRATIO(x,y) ( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO ) #define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) ) /* bad disposition, try to split by centers of boxes */ if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { double avgCenterX = 0.0, avgCenterY = 0.0; double CenterX, CenterY; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); avgCenterX += ((double) cur->high.x + (double) cur->low.x) / 2.0; avgCenterY += ((double) cur->high.y + (double) cur->low.y) / 2.0; } avgCenterX /= maxoff; avgCenterY /= maxoff; posL = posR = posB = posT = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); CenterX = ((double) cur->high.x + (double) cur->low.x) / 2.0; CenterY = ((double) cur->high.y + (double) cur->low.y) / 2.0; if (CenterX < avgCenterX) ADDLIST(listL, unionL, posL, i); else if (CenterX == avgCenterX) { if (posL > posR) ADDLIST(listR, unionR, posR, i); else ADDLIST(listL, unionL, posL, i); } else ADDLIST(listR, unionR, posR, i); if (CenterY < avgCenterY) ADDLIST(listB, unionB, posB, i); else if (CenterY == avgCenterY) { if (posB > posT) ADDLIST(listT, unionT, posT, i); else ADDLIST(listB, unionB, posB, i); } else ADDLIST(listT, unionT, posT, i); } if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { Datum interLR = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionL), BoxPGetDatum(unionR)); Datum interBT = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionB), BoxPGetDatum(unionT)); double sizeLR, sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); PG_RETURN_POINTER(v); }
static void chooseLR(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, BOX *union1, OffsetNumber *list2, int nlist2, BOX *union2) { bool firstToLeft = true; if (v->spl_ldatum_exists || v->spl_rdatum_exists) { if (v->spl_ldatum_exists && v->spl_rdatum_exists) { BOX LRl = *union1, LRr = *union2; BOX RLl = *union2, RLr = *union1; double sizeLR, sizeRL; adjustBox(&LRl, DatumGetBoxP(v->spl_ldatum)); adjustBox(&LRr, DatumGetBoxP(v->spl_rdatum)); adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum)); adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum)); sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr))); sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr))); if (sizeLR > sizeRL) firstToLeft = false; } else { float p1, p2; GISTENTRY oldUnion, addon; gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum, NULL, NULL, InvalidOffsetNumber, FALSE); gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE); DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p1)); gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE); DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p2)); if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2)) firstToLeft = false; } } if (firstToLeft) { v->spl_left = list1; v->spl_right = list2; v->spl_nleft = nlist1; v->spl_nright = nlist2; if (v->spl_ldatum_exists) adjustBox(union1, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(union1); if (v->spl_rdatum_exists) adjustBox(union2, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(union2); } else { v->spl_left = list2; v->spl_right = list1; v->spl_nleft = nlist2; v->spl_nright = nlist1; if (v->spl_ldatum_exists) adjustBox(union2, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(union2); if (v->spl_rdatum_exists) adjustBox(union1, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(union1); } v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear node_n Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan * * This is used for both boxes and points. */ datum_t gist_box_picksplit(PG_FUNC_ARGS) { struct gist_entry_vector *entryvec = (struct gist_entry_vector *)ARG_POINTER(0); struct gist_splitvec *v = (struct gist_splitvec *)ARG_POINTER(1); item_id_t i; item_id_t *listL; item_id_t *listR; item_id_t *listB; item_id_t *listT; BOX *unionL; BOX *unionR; BOX *unionB; BOX *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; item_id_t maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = D_TO_BOX_P(entryvec->vector[FIRST_ITEM_ID].key); memcpy((void*) &pageunion, (void*) cur, sizeof(BOX)); /* find MBR */ for (i = ITEM_ID_NEXT(FIRST_ITEM_ID); i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); if (allisequal && (pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y)) allisequal = false; adjustBox(&pageunion, cur); } if (allisequal) { /* * All entries are the same */ fallbackSplit(entryvec, v); RET_POINTER(v); } nbytes = (maxoff + 2) * sizeof(item_id_t); listL = (item_id_t *) palloc(nbytes); listR = (item_id_t *) palloc(nbytes); listB = (item_id_t *) palloc(nbytes); listT = (item_id_t *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } #define LIMIT_RATIO 0.1 #define _IS_BADRATIO(x,y) ( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO ) #define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) ) /* bad disposition, try to split by centers of boxes */ if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { double avgCenterX = 0.0; double avgCenterY = 0.0; double CenterX; double CenterY; for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); avgCenterX += ((double)cur->high.x + (double)cur->low.x) / 2.0; avgCenterY += ((double)cur->high.y + (double)cur->low.y) / 2.0; } avgCenterX /= maxoff; avgCenterY /= maxoff; posL = posR = posB = posT = 0; for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); CenterX = ((double)cur->high.x + (double)cur->low.x) / 2.0; CenterY = ((double)cur->high.y + (double)cur->low.y) / 2.0; if (CenterX < avgCenterX) ADDLIST(listL, unionL, posL, i); else if (CenterX == avgCenterX) { if (posL > posR) ADDLIST(listR, unionR, posR, i); else ADDLIST(listL, unionL, posL, i); } else ADDLIST(listR, unionR, posR, i); if (CenterY < avgCenterY) ADDLIST(listB, unionB, posB, i); else if (CenterY == avgCenterY) { if (posB > posT) ADDLIST(listT, unionT, posT, i); else ADDLIST(listB, unionB, posB, i); } else ADDLIST(listT, unionT, posT, i); } if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { fallbackSplit(entryvec, v); RET_POINTER(v); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { datum_t interLR = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionL), BOX_P_TO_D(unionR)); datum_t interBT = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionB), BOX_P_TO_D(unionT)); double sizeLR; double sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); RET_POINTER(v); }
static void chooseLR( struct gist_splitvec *v, item_id_t* list1, int nlist1, BOX* union1, item_id_t* list2, int nlist2, BOX* union2) { bool firstToLeft = true; if (v->spl_ldatum_exists || v->spl_rdatum_exists) { if (v->spl_ldatum_exists && v->spl_rdatum_exists) { BOX LRl = *union1; BOX LRr = *union2; BOX RLl = *union2; BOX RLr = *union1; double sizeLR; double sizeRL; adjustBox(&LRl, D_TO_BOX_P(v->spl_ldatum)); adjustBox(&LRr, D_TO_BOX_P(v->spl_rdatum)); adjustBox(&RLl, D_TO_BOX_P(v->spl_ldatum)); adjustBox(&RLr, D_TO_BOX_P(v->spl_rdatum)); sizeLR = size_box(DIRECT_FC2( rt_box_inter, BOX_P_TO_D(&LRl), BOX_P_TO_D(&LRr))); sizeRL = size_box(DIRECT_FC2( rt_box_inter, BOX_P_TO_D(&RLl), BOX_P_TO_D(&RLr))); if (sizeLR > sizeRL) firstToLeft = false; } else { float p1; float p2; struct gist_entry oldUnion; struct gist_entry addon; gistentryinit( oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum, NULL, NULL, INVALID_ITEM_ID, FALSE); gistentryinit(addon, BOX_P_TO_D(union1), NULL, NULL, INVALID_ITEM_ID, FALSE); DIRECT_FC3(gist_box_penalty, PTR_TO_D(&oldUnion), PTR_TO_D(&addon), PTR_TO_D(&p1)); gistentryinit(addon, BOX_P_TO_D(union2), NULL, NULL, INVALID_ITEM_ID, FALSE); DIRECT_FC3( gist_box_penalty, PTR_TO_D(&oldUnion), PTR_TO_D(&addon), PTR_TO_D(&p2)); if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2)) firstToLeft = false; } } if (firstToLeft) { v->spl_left = list1; v->spl_right = list2; v->spl_nleft = nlist1; v->spl_nright = nlist2; if (v->spl_ldatum_exists) adjustBox(union1, D_TO_BOX_P(v->spl_ldatum)); v->spl_ldatum = BOX_P_TO_D(union1); if (v->spl_rdatum_exists) adjustBox(union2, D_TO_BOX_P(v->spl_rdatum)); v->spl_rdatum = BOX_P_TO_D(union2); } else { v->spl_left = list2; v->spl_right = list1; v->spl_nleft = nlist2; v->spl_nright = nlist1; if (v->spl_ldatum_exists) adjustBox(union2, D_TO_BOX_P(v->spl_ldatum)); v->spl_ldatum = BOX_P_TO_D(union2); if (v->spl_rdatum_exists) adjustBox(union1, D_TO_BOX_P(v->spl_rdatum)); v->spl_rdatum = BOX_P_TO_D(union1); } v->spl_ldatum_exists = v->spl_rdatum_exists = false; }
/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i; OffsetNumber *listL, *listR, *listB, *listT; BOX *unionL, *unionR, *unionB, *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; OffsetNumber maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key); memcpy((void *) &pageunion, (void *) cur, sizeof(BOX)); /* find MBR */ for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (allisequal == true && ( pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y )) allisequal = false; adjustBox(&pageunion, cur); } nbytes = (maxoff + 2) * sizeof(OffsetNumber); listL = (OffsetNumber *) palloc(nbytes); listR = (OffsetNumber *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); if (allisequal) { cur = DatumGetBoxP(entryvec->vector[OffsetNumberNext(FirstOffsetNumber)].key); if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX)) == 0) { v->spl_left = listL; v->spl_right = listR; v->spl_nleft = v->spl_nright = 0; memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX)); memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX)); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { v->spl_left[v->spl_nleft] = i; v->spl_nleft++; } else { v->spl_right[v->spl_nright] = i; v->spl_nright++; } } if (v->spl_ldatum_exists) adjustBox(unionL, DatumGetBoxP(v->spl_ldatum)); v->spl_ldatum = BoxPGetDatum(unionL); if (v->spl_rdatum_exists) adjustBox(unionR, DatumGetBoxP(v->spl_rdatum)); v->spl_rdatum = BoxPGetDatum(unionR); v->spl_ldatum_exists = v->spl_rdatum_exists = false; PG_RETURN_POINTER(v); } } listB = (OffsetNumber *) palloc(nbytes); listT = (OffsetNumber *) palloc(nbytes); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } /* bad disposition, sort by ascending and resplit */ if ((posR == 0 || posL == 0) && (posT == 0 || posB == 0)) { KBsort *arr = (KBsort *) palloc(sizeof(KBsort) * maxoff); posL = posR = posB = posT = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { arr[i - 1].key = DatumGetBoxP(entryvec->vector[i].key); arr[i - 1].pos = i; } qsort(arr, maxoff, sizeof(KBsort), compare_KB); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = arr[i - 1].key; if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, arr[i - 1].pos); else if (cur->low.x - pageunion.low.x == pageunion.high.x - cur->high.x) { if (posL > posR) ADDLIST(listR, unionR, posR, arr[i - 1].pos); else ADDLIST(listL, unionL, posL, arr[i - 1].pos); } else ADDLIST(listR, unionR, posR, arr[i - 1].pos); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, arr[i - 1].pos); else if (cur->low.y - pageunion.low.y == pageunion.high.y - cur->high.y) { if (posB > posT) ADDLIST(listT, unionT, posT, arr[i - 1].pos); else ADDLIST(listB, unionB, posB, arr[i - 1].pos); } else ADDLIST(listT, unionT, posT, arr[i - 1].pos); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { Datum interLR = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionL), BoxPGetDatum(unionR)); Datum interBT = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionB), BoxPGetDatum(unionT)); double sizeLR, sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); PG_RETURN_POINTER(v); }
/* * -------------------------------------------------------------------------- * Double sorting split algorithm. This is used for both boxes and points. * * The algorithm finds split of boxes by considering splits along each axis. * Each entry is first projected as an interval on the X-axis, and different * ways to split the intervals into two groups are considered, trying to * minimize the overlap of the groups. Then the same is repeated for the * Y-axis, and the overall best split is chosen. The quality of a split is * determined by overlap along that axis and some other criteria (see * g_box_consider_split). * * After that, all the entries are divided into three groups: * * 1) Entries which should be placed to the left group * 2) Entries which should be placed to the right group * 3) "Common entries" which can be placed to any of groups without affecting * of overlap along selected axis. * * The common entries are distributed by minimizing penalty. * * For details see: * "A new___ double sorting-based node splitting algorithm for R-tree", A. Korotkov * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36 * -------------------------------------------------------------------------- */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i, maxoff; ConsiderSplitContext context; BOX *box, *leftBox, *rightBox; int dim, commonEntriesCount; SplitInterval *intervalsLower, *intervalsUpper; CommonEntry *commonEntries; int nentries; memset(&context, 0, sizeof(ConsiderSplitContext)); maxoff = entryvec->n - 1; nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1; /* Allocate arrays for intervals along axes */ intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval)); intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval)); /* * Calculate the overall minimum bounding box over all the entries. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { box = DatumGetBoxP(entryvec->vector[i].key); if (i == FirstOffsetNumber) context.boundingBox = *box; else adjustBox(&context.boundingBox, box); } /* * Iterate over axes for optimal split searching. */ context.first = true; /* nothing selected yet */ for (dim = 0; dim < 2; dim++) { double leftUpper, rightLower; int i1, i2; /* Project each entry as an interval on the selected axis. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { box = DatumGetBoxP(entryvec->vector[i].key); if (dim == 0) { intervalsLower[i - FirstOffsetNumber].lower = box->low.x; intervalsLower[i - FirstOffsetNumber].upper = box->high.x; } else { intervalsLower[i - FirstOffsetNumber].lower = box->low.y; intervalsLower[i - FirstOffsetNumber].upper = box->high.y; } } /* * Make two arrays of intervals: one sorted by lower bound and another * sorted by upper bound. */ memcpy(intervalsUpper, intervalsLower, sizeof(SplitInterval) * nentries); qsort(intervalsLower, nentries, sizeof(SplitInterval), interval_cmp_lower); qsort(intervalsUpper, nentries, sizeof(SplitInterval), interval_cmp_upper); /*---- * The goal is to form a left and right interval, so that every entry * interval is contained by either left or right interval (or both). * * For example, with the intervals (0,1), (1,3), (2,3), (2,4): * * 0 1 2 3 4 * +-+ * +---+ * +-+ * +---+ * * The left and right intervals are of the form (0,a) and (b,4). * We first consider splits where b is the lower bound of an entry. * We iterate through all entries, and for each b, calculate the * smallest possible a. Then we consider splits where a is the * uppper bound of an entry, and for each a, calculate the greatest * possible b. * * In the above example, the first loop would consider splits: * b=0: (0,1)-(0,4) * b=1: (0,1)-(1,4) * b=2: (0,3)-(2,4) * * And the second loop: * a=1: (0,1)-(1,4) * a=3: (0,3)-(2,4) * a=4: (0,4)-(2,4) */ /* * Iterate over lower bound of right group, finding smallest possible * upper bound of left group. */ i1 = 0; i2 = 0; rightLower = intervalsLower[i1].lower; leftUpper = intervalsUpper[i2].lower; while (true) { /* * Find next lower bound of right group. */ while (i1 < nentries && rightLower == intervalsLower[i1].lower) { leftUpper = Max(leftUpper, intervalsLower[i1].upper); i1++; } if (i1 >= nentries) break; rightLower = intervalsLower[i1].lower; /* * Find count of intervals which anyway should be placed to the * left group. */ while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper) i2++; /* * Consider found split. */ g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2); } /* * Iterate over upper bound of left group finding greates possible * lower bound of right group. */ i1 = nentries - 1; i2 = nentries - 1; rightLower = intervalsLower[i1].upper; leftUpper = intervalsUpper[i2].upper; while (true) { /* * Find next upper bound of left group. */ while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper) { rightLower = Min(rightLower, intervalsUpper[i2].lower); i2--; } if (i2 < 0) break; leftUpper = intervalsUpper[i2].upper; /* * Find count of intervals which anyway should be placed to the * right group. */ while (i1 >= 0 && intervalsLower[i1].lower >= rightLower) i1--; /* * Consider found split. */ g_box_consider_split(&context, dim, rightLower, i1 + 1, leftUpper, i2 + 1); } } /* * If we failed to find any acceptable splits, use trivial split. */ if (context.first) { fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } /* * Ok, we have now selected the split across one axis. * * While considering the splits, we already determined that there will be * enough entries in both groups to reach the desired ratio, but we did * not memorize which entries go to which group. So determine that now. */ /* Allocate vectors for results */ v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_nleft = 0; v->spl_nright = 0; /* Allocate bounding boxes of left and right groups */ leftBox = static_cast<BOX *>(palloc0(sizeof(BOX))); rightBox = static_cast<BOX *>(palloc0(sizeof(BOX))); /* * Allocate an array for "common entries" - entries which can be placed to * either group without affecting overlap along selected axis. */ commonEntriesCount = 0; commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry)); /* Helper macros to place an entry in the left or right group */ #define PLACE_LEFT(box, off) \ do { \ if (v->spl_nleft > 0) \ adjustBox(leftBox, box); \ else \ *leftBox = *(box); \ v->spl_left[v->spl_nleft++] = off; \ } while(0) #define PLACE_RIGHT(box, off) \ do { \ if (v->spl_nright > 0) \ adjustBox(rightBox, box); \ else \ *rightBox = *(box); \ v->spl_right[v->spl_nright++] = off; \ } while(0) /* * Distribute entries which can be distributed unambiguously, and collect * common entries. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { double lower, upper; /* * Get upper and lower bounds along selected axis. */ box = DatumGetBoxP(entryvec->vector[i].key); if (context.dim == 0) { lower = box->low.x; upper = box->high.x; } else { lower = box->low.y; upper = box->high.y; } if (upper <= context.leftUpper) { /* Fits to the left group */ if (lower >= context.rightLower) { /* Fits also to the right group, so "common entry" */ commonEntries[commonEntriesCount++].index = i; } else { /* Doesn't fit to the right group, so join to the left group */ PLACE_LEFT(box, i); } } else { /* * Each entry should fit on either left or right group. Since this * entry didn't fit on the left group, it better fit in the right * group. */ Assert(lower >= context.rightLower); /* Doesn't fit to the left group, so join to the right group */ PLACE_RIGHT(box, i); } } /* * Distribute "common entries", if any. */ if (commonEntriesCount > 0) { /* * Calculate minimum number of entries that must be placed in both * groups, to reach LIMIT_RATIO. */ int m = ceil(LIMIT_RATIO * (double) nentries); /* * Calculate delta between penalties of join "common entries" to * different groups. */ for (i = 0; i < commonEntriesCount; i++) { box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key); commonEntries[i].delta = Abs(box_penalty(leftBox, box) - box_penalty(rightBox, box)); } /* * Sort "common entries" by calculated deltas in order to distribute * the most ambiguous entries first. */ qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp); /* * Distribute "common entries" between groups. */ for (i = 0; i < commonEntriesCount; i++) { box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key); /* * Check if we have to place this entry in either group to achieve * LIMIT_RATIO. */ if (v->spl_nleft + (commonEntriesCount - i) <= m) PLACE_LEFT(box, commonEntries[i].index); else if (v->spl_nright + (commonEntriesCount - i) <= m) PLACE_RIGHT(box, commonEntries[i].index); else { /* Otherwise select the group by minimal penalty */ if (box_penalty(leftBox, box) < box_penalty(rightBox, box)) PLACE_LEFT(box, commonEntries[i].index); else PLACE_RIGHT(box, commonEntries[i].index); } } } v->spl_ldatum = PointerGetDatum(leftBox); v->spl_rdatum = PointerGetDatum(rightBox); PG_RETURN_POINTER(v); }