/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan * * This is used for both boxes and points. */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i; OffsetNumber *listL, *listR, *listB, *listT; BOX *unionL, *unionR, *unionB, *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; OffsetNumber maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key); memcpy((void *) &pageunion, (void *) cur, sizeof(BOX)); /* find MBR */ for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (allisequal && ( pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y )) allisequal = false; adjustBox(&pageunion, cur); } if (allisequal) { /* * All entries are the same */ fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } nbytes = (maxoff + 2) * sizeof(OffsetNumber); listL = (OffsetNumber *) palloc(nbytes); listR = (OffsetNumber *) palloc(nbytes); listB = (OffsetNumber *) palloc(nbytes); listT = (OffsetNumber *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } #define LIMIT_RATIO 0.1 #define _IS_BADRATIO(x,y) ( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO ) #define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) ) /* bad disposition, try to split by centers of boxes */ if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { double avgCenterX = 0.0, avgCenterY = 0.0; double CenterX, CenterY; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); avgCenterX += ((double) cur->high.x + (double) cur->low.x) / 2.0; avgCenterY += ((double) cur->high.y + (double) cur->low.y) / 2.0; } avgCenterX /= maxoff; avgCenterY /= maxoff; posL = posR = posB = posT = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(entryvec->vector[i].key); CenterX = ((double) cur->high.x + (double) cur->low.x) / 2.0; CenterY = ((double) cur->high.y + (double) cur->low.y) / 2.0; if (CenterX < avgCenterX) ADDLIST(listL, unionL, posL, i); else if (CenterX == avgCenterX) { if (posL > posR) ADDLIST(listR, unionR, posR, i); else ADDLIST(listL, unionL, posL, i); } else ADDLIST(listR, unionR, posR, i); if (CenterY < avgCenterY) ADDLIST(listB, unionB, posB, i); else if (CenterY == avgCenterY) { if (posB > posT) ADDLIST(listT, unionT, posT, i); else ADDLIST(listB, unionB, posB, i); } else ADDLIST(listT, unionT, posT, i); } if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { Datum interLR = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionL), BoxPGetDatum(unionR)); Datum interBT = DirectFunctionCall2(rt_box_inter, BoxPGetDatum(unionB), BoxPGetDatum(unionT)); double sizeLR, sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); PG_RETURN_POINTER(v); }
/* * The GiST PickSplit method * * New linear algorithm, see 'New Linear node_n Splitting Algorithm for R-tree', * C.H.Ang and T.C.Tan * * This is used for both boxes and points. */ datum_t gist_box_picksplit(PG_FUNC_ARGS) { struct gist_entry_vector *entryvec = (struct gist_entry_vector *)ARG_POINTER(0); struct gist_splitvec *v = (struct gist_splitvec *)ARG_POINTER(1); item_id_t i; item_id_t *listL; item_id_t *listR; item_id_t *listB; item_id_t *listT; BOX *unionL; BOX *unionR; BOX *unionB; BOX *unionT; int posL, posR, posB, posT; BOX pageunion; BOX *cur; char direction = ' '; bool allisequal = true; item_id_t maxoff; int nbytes; posL = posR = posB = posT = 0; maxoff = entryvec->n - 1; cur = D_TO_BOX_P(entryvec->vector[FIRST_ITEM_ID].key); memcpy((void*) &pageunion, (void*) cur, sizeof(BOX)); /* find MBR */ for (i = ITEM_ID_NEXT(FIRST_ITEM_ID); i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); if (allisequal && (pageunion.high.x != cur->high.x || pageunion.high.y != cur->high.y || pageunion.low.x != cur->low.x || pageunion.low.y != cur->low.y)) allisequal = false; adjustBox(&pageunion, cur); } if (allisequal) { /* * All entries are the same */ fallbackSplit(entryvec, v); RET_POINTER(v); } nbytes = (maxoff + 2) * sizeof(item_id_t); listL = (item_id_t *) palloc(nbytes); listR = (item_id_t *) palloc(nbytes); listB = (item_id_t *) palloc(nbytes); listT = (item_id_t *) palloc(nbytes); unionL = (BOX *) palloc(sizeof(BOX)); unionR = (BOX *) palloc(sizeof(BOX)); unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); #define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ } else { \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ } \ \ (list)[pos] = num; \ (pos)++; \ } while(0) for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) ADDLIST(listL, unionL, posL, i); else ADDLIST(listR, unionR, posR, i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) ADDLIST(listB, unionB, posB, i); else ADDLIST(listT, unionT, posT, i); } #define LIMIT_RATIO 0.1 #define _IS_BADRATIO(x,y) ( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO ) #define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) ) /* bad disposition, try to split by centers of boxes */ if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { double avgCenterX = 0.0; double avgCenterY = 0.0; double CenterX; double CenterY; for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); avgCenterX += ((double)cur->high.x + (double)cur->low.x) / 2.0; avgCenterY += ((double)cur->high.y + (double)cur->low.y) / 2.0; } avgCenterX /= maxoff; avgCenterY /= maxoff; posL = posR = posB = posT = 0; for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) { cur = D_TO_BOX_P(entryvec->vector[i].key); CenterX = ((double)cur->high.x + (double)cur->low.x) / 2.0; CenterY = ((double)cur->high.y + (double)cur->low.y) / 2.0; if (CenterX < avgCenterX) ADDLIST(listL, unionL, posL, i); else if (CenterX == avgCenterX) { if (posL > posR) ADDLIST(listR, unionR, posR, i); else ADDLIST(listL, unionL, posL, i); } else ADDLIST(listR, unionR, posR, i); if (CenterY < avgCenterY) ADDLIST(listB, unionB, posB, i); else if (CenterY == avgCenterY) { if (posB > posT) ADDLIST(listT, unionT, posT, i); else ADDLIST(listB, unionB, posB, i); } else ADDLIST(listT, unionT, posT, i); } if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) { fallbackSplit(entryvec, v); RET_POINTER(v); } } /* which split more optimal? */ if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT)) direction = 'y'; else { datum_t interLR = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionL), BOX_P_TO_D(unionR)); datum_t interBT = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionB), BOX_P_TO_D(unionT)); double sizeLR; double sizeBT; sizeLR = size_box(interLR); sizeBT = size_box(interBT); if (sizeLR < sizeBT) direction = 'x'; else direction = 'y'; } if (direction == 'x') chooseLR(v, listL, posL, unionL, listR, posR, unionR); else chooseLR(v, listB, posB, unionB, listT, posT, unionT); RET_POINTER(v); }