Datum gtsquery_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber maxoff = entryvec->n - 2; OffsetNumber k, j; TSQuerySign *datum_l, *datum_r; int4 size_alpha, size_beta; int4 size_waste, waste = -1; int4 nbytes; OffsetNumber seed_1 = 0, seed_2 = 0; OffsetNumber *left, *right; SPLITCOST *costvector; nbytes = (maxoff + 2) * sizeof(OffsetNumber); left = v->spl_left = (OffsetNumber *) palloc(nbytes); right = v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_nleft = v->spl_nright = 0; for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k)); if (size_waste > waste) { waste = size_waste; seed_1 = k; seed_2 = j; } } if (seed_1 == 0 || seed_2 == 0) { seed_1 = 1; seed_2 = 2; } datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign)); *datum_l = *GETENTRY(entryvec, seed_1); datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign)); *datum_r = *GETENTRY(entryvec, seed_2); maxoff = OffsetNumberNext(maxoff); costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { costvector[j - 1].pos = j; size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j)); size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j)); costvector[j - 1].cost = abs(size_alpha - size_beta); } qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); for (k = 0; k < maxoff; k++) { j = costvector[k].pos; if (j == seed_1) { *left++ = j; v->spl_nleft++; continue; } else if (j == seed_2) { *right++ = j; v->spl_nright++; continue; } size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j)); size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j)); if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) { *datum_l |= *GETENTRY(entryvec, j); *left++ = j; v->spl_nleft++; } else { *datum_r |= *GETENTRY(entryvec, j); *right++ = j; v->spl_nright++; } } *right = *left = FirstOffsetNumber; v->spl_ldatum = PointerGetDatum(datum_l); v->spl_rdatum = PointerGetDatum(datum_r); PG_RETURN_POINTER(v); }
/* ** The GiST PickSplit method for _intments ** We use Guttman's poly time split algorithm */ Datum g_int_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i, j; ArrayType *datum_alpha, *datum_beta; ArrayType *datum_l, *datum_r; ArrayType *union_d, *union_dl, *union_dr; ArrayType *inter_d; bool firsttime; float size_alpha, size_beta, size_union, size_inter; float size_waste, waste; float size_l, size_r; int nbytes; OffsetNumber seed_1 = 0, seed_2 = 0; OffsetNumber *left, *right; OffsetNumber maxoff; SPLITCOST *costvector; #ifdef GIST_DEBUG elog(DEBUG3, "--------picksplit %d", entryvec->n); #endif maxoff = entryvec->n - 2; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); firsttime = true; waste = 0.0; for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { datum_alpha = GETENTRY(entryvec, i); for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { datum_beta = GETENTRY(entryvec, j); /* compute the wasted space by unioning these guys */ /* size_waste = size_union - size_inter; */ union_d = inner_int_union(datum_alpha, datum_beta); rt__int_size(union_d, &size_union); inter_d = inner_int_inter(datum_alpha, datum_beta); rt__int_size(inter_d, &size_inter); size_waste = size_union - size_inter; pfree(union_d); if (inter_d != (ArrayType *) NULL) pfree(inter_d); /* * are these a more promising split that what we've already seen? */ if (size_waste > waste || firsttime) { waste = size_waste; seed_1 = i; seed_2 = j; firsttime = false; } } } left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; if (seed_1 == 0 || seed_2 == 0) { seed_1 = 1; seed_2 = 2; } datum_alpha = GETENTRY(entryvec, seed_1); datum_l = copy_intArrayType(datum_alpha); rt__int_size(datum_l, &size_l); datum_beta = GETENTRY(entryvec, seed_2); datum_r = copy_intArrayType(datum_beta); rt__int_size(datum_r, &size_r); maxoff = OffsetNumberNext(maxoff); /* * sort entries */ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { costvector[i - 1].pos = i; datum_alpha = GETENTRY(entryvec, i); union_d = inner_int_union(datum_l, datum_alpha); rt__int_size(union_d, &size_alpha); pfree(union_d); union_d = inner_int_union(datum_r, datum_alpha); rt__int_size(union_d, &size_beta); pfree(union_d); costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r)); } qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); /* * Now split up the regions between the two seeds. An important property * of this split algorithm is that the split vector v has the indices of * items to be split in order in its left and right vectors. We exploit * this property by doing a merge in the code that actually splits the * page. * * For efficiency, we also place the new index tuple in this loop. This is * handled at the very end, when we have placed all the existing tuples * and i == maxoff + 1. */ for (j = 0; j < maxoff; j++) { i = costvector[j].pos; /* * If we've already decided where to place this item, just put it on * the right list. Otherwise, we need to figure out which page needs * the least enlargement in order to store the item. */ if (i == seed_1) { *left++ = i; v->spl_nleft++; continue; } else if (i == seed_2) { *right++ = i; v->spl_nright++; continue; } /* okay, which page needs least enlargement? */ datum_alpha = GETENTRY(entryvec, i); union_dl = inner_int_union(datum_l, datum_alpha); union_dr = inner_int_union(datum_r, datum_alpha); rt__int_size(union_dl, &size_alpha); rt__int_size(union_dr, &size_beta); /* pick which page to add it to */ if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01)) { if (datum_l) pfree(datum_l); if (union_dr) pfree(union_dr); datum_l = union_dl; size_l = size_alpha; *left++ = i; v->spl_nleft++; } else { if (datum_r) pfree(datum_r); if (union_dl) pfree(union_dl); datum_r = union_dr; size_r = size_beta; *right++ = i; v->spl_nright++; } } pfree(costvector); *right = *left = FirstOffsetNumber; v->spl_ldatum = PointerGetDatum(datum_l); v->spl_rdatum = PointerGetDatum(datum_r); PG_RETURN_POINTER(v); }
Datum gmol_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber k, j; bytea *datum_l, *datum_r; int32 size_alpha, size_beta; int32 size_waste, waste = -1; int32 nbytes; OffsetNumber seed_1 = 0, seed_2 = 0; OffsetNumber *left, *right; OffsetNumber maxoff; int i, signlen = 0; SPLITCOST *costvector; maxoff = entryvec->n - 1; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) { if (signlen == 0) { signlen = SIGLEN(GETENTRY(entryvec, k)); } for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k)); if (size_waste > waste) { waste = size_waste; seed_1 = k; seed_2 = j; } } } if (signlen == 0) { signlen = SIGLEN(GETENTRY(entryvec, maxoff)); } left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; if (signlen == 0 || waste == 0) { /* all entries a alltrue or all the same */ for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) { if (k <= (maxoff - FirstOffsetNumber + 1) / 2) { v->spl_left[v->spl_nleft] = k; v->spl_nleft++; } else { v->spl_right[v->spl_nright] = k; v->spl_nright++; } } signlen = VARSIZE(GETENTRY(entryvec, FirstOffsetNumber)); datum_l = palloc(signlen); memcpy(datum_l, GETENTRY(entryvec, FirstOffsetNumber), signlen); v->spl_ldatum = PointerGetDatum(datum_l); datum_r = palloc(signlen); memcpy(datum_r, GETENTRY(entryvec, FirstOffsetNumber), signlen); v->spl_rdatum = PointerGetDatum(datum_r); Assert( v->spl_nleft + v->spl_nright == maxoff ); PG_RETURN_POINTER(v); } if (seed_1 == 0 || seed_2 == 0) { seed_1 = 1; seed_2 = 2; } /* form initial .. */ if (ISALLTRUE(GETENTRY(entryvec, seed_1))) { datum_l = palloc(VARHDRSZ); SET_VARSIZE(datum_l, VARHDRSZ); } else { datum_l = palloc(signlen + VARHDRSZ); memcpy(datum_l , GETENTRY(entryvec, seed_1) , signlen + VARHDRSZ); } if (ISALLTRUE(GETENTRY(entryvec, seed_2))) { datum_r = palloc(VARHDRSZ); SET_VARSIZE(datum_r, VARHDRSZ); } else { datum_r = palloc(signlen + VARHDRSZ); memcpy(datum_r , GETENTRY(entryvec, seed_2) , signlen + VARHDRSZ); } /* sort before ... */ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { costvector[j - 1].pos = j; size_alpha = hemdist(datum_l, GETENTRY(entryvec, j)); size_beta = hemdist(datum_r, GETENTRY(entryvec, j)); costvector[j - 1].cost = Abs(size_alpha - size_beta); } qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); for (k = 0; k < maxoff; k++) { j = costvector[k].pos; if (j == seed_1) { *left++ = j; v->spl_nleft++; continue; } else if (j == seed_2) { *right++ = j; v->spl_nright++; continue; } size_alpha = hemdist(GETENTRY(entryvec, j), datum_l); size_beta = hemdist(GETENTRY(entryvec, j), datum_r); if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) { if (!ISALLTRUE(datum_l)) { if (ISALLTRUE(GETENTRY(entryvec, j))) { datum_l = palloc(VARHDRSZ); SET_VARSIZE(datum_l, VARHDRSZ); } else { unsigned char *as = (unsigned char *)VARDATA(datum_l), *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j)); for (i=0;i<signlen;i++) { as[i] |= bs[i]; } } } *left++ = j; v->spl_nleft++; } else { if (!ISALLTRUE(datum_r)) { if (ISALLTRUE(GETENTRY(entryvec, j))) { datum_r = palloc(VARHDRSZ); SET_VARSIZE(datum_r, VARHDRSZ); } else { unsigned char *as = (unsigned char *)VARDATA(datum_r), *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j)); for (i=0;i<signlen;i++) { as[i] |= bs[i]; } } } *right++ = j; v->spl_nright++; } } *right = *left = FirstOffsetNumber; v->spl_ldatum = PointerGetDatum(datum_l); v->spl_rdatum = PointerGetDatum(datum_r); Assert( v->spl_nleft + v->spl_nright == maxoff ); PG_RETURN_POINTER(v); }