/* Enlarge b_union to contain b_new. If b_new contains more dimensions than b_union, expand b_union to contain those dimensions. */ static void gidx_merge(GIDX **b_union, GIDX *b_new) { int i, dims_union, dims_new; Assert(b_union); Assert(*b_union); Assert(b_new); dims_union = GIDX_NDIMS(*b_union); dims_new = GIDX_NDIMS(b_new); POSTGIS_DEBUGF(4, "merging gidx (%s) into gidx (%s)", gidx_to_string(b_new), gidx_to_string(*b_union)); if ( dims_new > dims_union ) { POSTGIS_DEBUGF(5, "reallocating b_union from %d dims to %d dims", dims_union, dims_new); *b_union = (GIDX*)repalloc(*b_union, GIDX_SIZE(dims_new)); SET_VARSIZE(*b_union, VARSIZE(b_new)); dims_union = dims_new; } for ( i = 0; i < dims_new; i++ ) { /* Adjust minimums */ GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union,i),GIDX_GET_MIN(b_new,i))); /* Adjust maximums */ GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union,i),GIDX_GET_MAX(b_new,i))); } POSTGIS_DEBUGF(5, "merge complete (%s)", gidx_to_string(*b_union)); return; }
/* ** Equality GIDX test. ** ** Box(A) EQUALS Box(B) IFF (pt(A)LL == pt(B)LL) && (pt(A)UR == pt(B)UR) */ static bool gidx_equals(GIDX *a, GIDX *b) { int i; POSTGIS_DEBUG(5, "entered function"); if ( (a == NULL) && (b == NULL) ) return TRUE; if ( (a == NULL) || (b == NULL) ) return FALSE; /* Ensure 'a' has the most dimensions. */ gidx_dimensionality_check(&a, &b); /* For all shared dimensions min(a) == min(b), max(a) == max(b) */ for (i = 0; i < GIDX_NDIMS(b); i++) { if ( GIDX_GET_MIN(a,i) != GIDX_GET_MIN(b,i) ) return FALSE; if ( GIDX_GET_MAX(a,i) != GIDX_GET_MAX(b,i) ) return FALSE; } /* For all unshared dimensions min(a) == 0.0, max(a) == 0.0 */ for (i = GIDX_NDIMS(b); i < GIDX_NDIMS(a); i++) { if ( GIDX_GET_MIN(a,i) != 0.0 ) return FALSE; if ( GIDX_GET_MAX(a,i) != 0.0 ) return FALSE; } return TRUE; }
/* ** Overlapping GIDX box test. ** ** Box(A) Overlap Box(B) IFF (pt(a)LL < pt(B)UR) && (pt(b)LL < pt(a)UR) */ static bool gidx_overlaps(GIDX *a, GIDX *b) { int i; int ndims_b; POSTGIS_DEBUG(5, "entered function"); if ( (a == NULL) || (b == NULL) ) return FALSE; /* Ensure 'a' has the most dimensions. */ gidx_dimensionality_check(&a, &b); ndims_b = GIDX_NDIMS(b); /* compare within the dimensions of (b) */ for ( i = 0; i < ndims_b; i++ ) { if ( GIDX_GET_MIN(a,i) > GIDX_GET_MAX(b,i) ) return FALSE; if ( GIDX_GET_MIN(b,i) > GIDX_GET_MAX(a,i) ) return FALSE; } /* compare to zero those dimensions in (a) absent in (b) */ for ( i = ndims_b; i < GIDX_NDIMS(a); i++ ) { if ( GIDX_GET_MIN(a,i) > 0.0 ) return FALSE; if ( GIDX_GET_MAX(a,i) < 0.0 ) return FALSE; } return TRUE; }
/* Ensure the first argument has the higher dimensionality. */ static void gidx_dimensionality_check(GIDX **a, GIDX **b) { if ( GIDX_NDIMS(*a) < GIDX_NDIMS(*b) ) { GIDX *tmp = *b; *b = *a; *a = tmp; } }
/* Calculate the volume of the intersection of the boxes. */ static float gidx_inter_volume(GIDX *a, GIDX *b) { int i; float result; POSTGIS_DEBUG(5,"entered function"); if ( a == NULL || b == NULL ) { elog(ERROR, "gidx_inter_volume received a null argument"); return 0.0; } /* Ensure 'a' has the most dimensions. */ gidx_dimensionality_check(&a, &b); /* Initialize with minimal length of first dimension. */ result = Min(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Max(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0)); /* If they are disjoint (max < min) then return zero. */ if ( result < 0.0 ) return 0.0; /* Continue for remaining dimensions. */ for ( i = 1; i < GIDX_NDIMS(b); i++ ) { float width = Min(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Max(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i)); if ( width < 0.0 ) return 0.0; /* Multiply by minimal length of remaining dimensions. */ result *= width; } POSTGIS_DEBUGF(5, "volume( %s intersection %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result); return result; }
/* Calculate the volume of the union of the boxes. Avoids creating an intermediate box. */ static float gidx_union_volume(GIDX *a, GIDX *b) { float result; int i; int ndims_a, ndims_b; POSTGIS_DEBUG(5,"entered function"); if ( a == NULL && b == NULL ) { elog(ERROR, "gidx_union_volume received two null arguments"); return 0.0; } if ( a == NULL ) return gidx_volume(b); if ( b == NULL ) return gidx_volume(a); /* Ensure 'a' has the most dimensions. */ gidx_dimensionality_check(&a, &b); ndims_a = GIDX_NDIMS(a); ndims_b = GIDX_NDIMS(b); /* Initialize with maximal length of first dimension. */ result = Max(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Min(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0)); /* Multiply by maximal length of remaining dimensions. */ for ( i = 1; i < ndims_b; i++ ) { result *= (Max(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Min(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i))); } /* Add in dimensions of higher dimensional box. */ for ( i = ndims_b; i < ndims_a; i++ ) { result *= (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i)); } POSTGIS_DEBUGF(5, "volume( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result); return result; }
/* ** GIDX expansion, make d units bigger in all dimensions. */ void gidx_expand(GIDX *a, float d) { int i; POSTGIS_DEBUG(5, "entered function"); if ( a == NULL ) return; for (i = 0; i < GIDX_NDIMS(a); i++) { GIDX_SET_MIN(a, i, GIDX_GET_MIN(a, i) - d); GIDX_SET_MAX(a, i, GIDX_GET_MAX(a, i) + d); } }
/* ** Containment GIDX test. ** ** Box(A) CONTAINS Box(B) IFF (pt(A)LL < pt(B)LL) && (pt(A)UR > pt(B)UR) */ static bool gidx_contains(GIDX *a, GIDX *b) { int i, dims_a, dims_b; POSTGIS_DEBUG(5, "entered function"); if ( (a == NULL) || (b == NULL) ) return FALSE; dims_a = GIDX_NDIMS(a); dims_b = GIDX_NDIMS(b); if ( dims_a < dims_b ) { /* ** If (b) is of higher dimensionality than (a) it can only be contained ** if those higher dimensions are zeroes. */ for (i = dims_a; i < dims_b; i++) { if ( GIDX_GET_MIN(b,i) != 0 ) return FALSE; if ( GIDX_GET_MAX(b,i) != 0 ) return FALSE; } } /* Excess dimensions of (a), don't matter, it just has to contain (b) in (b)'s dimensions */ for (i = 0; i < Min(dims_a, dims_b); i++) { if ( GIDX_GET_MIN(a,i) > GIDX_GET_MIN(b,i) ) return FALSE; if ( GIDX_GET_MAX(a,i) < GIDX_GET_MAX(b,i) ) return FALSE; } return TRUE; }
/* Calculate the volume (in n-d units) of the GIDX */ static float gidx_volume(GIDX *a) { float result; int i; if ( a == NULL ) { /* elog(ERROR, "gidx_volume received a null argument"); */ return 0.0; } result = GIDX_GET_MAX(a,0) - GIDX_GET_MIN(a,0); for ( i = 1; i < GIDX_NDIMS(a); i++ ) result *= (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i)); POSTGIS_DEBUGF(5, "calculated volume of %s as %.12g", gidx_to_string(a), result); return result; }
/* Ensure all minimums are below maximums. */ static inline void gidx_validate(GIDX *b) { int i; Assert(b); POSTGIS_DEBUGF(5,"validating gidx (%s)", gidx_to_string(b)); for ( i = 0; i < GIDX_NDIMS(b); i++ ) { if ( GIDX_GET_MIN(b,i) > GIDX_GET_MAX(b,i) ) { float tmp; tmp = GIDX_GET_MIN(b,i); GIDX_SET_MIN(b,i,GIDX_GET_MAX(b,i)); GIDX_SET_MAX(b,i,tmp); } } return; }
/* ** Make a copy of a GSERIALIZED, with a new bounding box value embedded. */ GSERIALIZED* gidx_insert_into_gserialized(GSERIALIZED *g, GIDX *gidx) { int g_ndims = (FLAGS_GET_GEODETIC(g->flags) ? 3 : FLAGS_NDIMS(g->flags)); int box_ndims = GIDX_NDIMS(gidx); GSERIALIZED *g_out = NULL; size_t box_size = 2 * g_ndims * sizeof(float); /* The dimensionality of the inputs has to match or we are SOL. */ if ( g_ndims != box_ndims ) { return NULL; } /* Serialized already has room for a box. We just need to copy it and write the new values into place. */ if ( FLAGS_GET_BBOX(g->flags) ) { g_out = palloc(VARSIZE(g)); memcpy(g_out, g, VARSIZE(g)); } /* Serialized has no box. We need to allocate enough space for the old data plus the box, and leave a gap in the memory segment to write the new values into. */ else { size_t varsize_new = VARSIZE(g) + box_size; uchar *ptr; g_out = palloc(varsize_new); /* Copy the head of g into place */ memcpy(g_out, g, 8); /* Copy the body of g into place after leaving space for the box */ ptr = g_out->data; ptr += box_size; memcpy(ptr, g->data, VARSIZE(g) - 8); FLAGS_SET_BBOX(g_out->flags, 1); SET_VARSIZE(g_out, varsize_new); } /* Now write the gidx values into the memory segement */ memcpy(g_out->data, gidx->c, box_size); return g_out; }
static char* gidx_to_string(GIDX *a) { char *str, *rv; int i, ndims; if ( a == NULL ) return pstrdup("<NULLPTR>"); str = (char*)palloc(128); rv = str; ndims = GIDX_NDIMS(a); str += sprintf(str, "GIDX("); for ( i = 0; i < ndims; i++ ) str += sprintf(str, " %.12g", GIDX_GET_MIN(a,i)); str += sprintf(str, ","); for ( i = 0; i < ndims; i++ ) str += sprintf(str, " %.12g", GIDX_GET_MAX(a,i)); str += sprintf(str, " )"); return rv; }
Datum geography_gist_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry_in = (GISTENTRY*)PG_GETARG_POINTER(0); GISTENTRY *entry_out = NULL; char gidxmem[GIDX_MAX_SIZE]; GIDX *bbox_out = (GIDX*)gidxmem; int result = G_SUCCESS; int i; POSTGIS_DEBUG(4, "[GIST] 'compress' function called"); /* ** Not a leaf key? There's nothing to do. ** Return the input unchanged. */ if ( ! entry_in->leafkey ) { POSTGIS_DEBUG(4, "[GIST] non-leafkey entry, returning input unaltered"); PG_RETURN_POINTER(entry_in); } POSTGIS_DEBUG(4, "[GIST] processing leafkey input"); entry_out = palloc(sizeof(GISTENTRY)); /* ** Null key? Make a copy of the input entry and ** return. */ if ( DatumGetPointer(entry_in->key) == NULL ) { POSTGIS_DEBUG(4, "[GIST] leafkey is null"); gistentryinit(*entry_out, (Datum) 0, entry_in->rel, entry_in->page, entry_in->offset, FALSE); POSTGIS_DEBUG(4, "[GIST] returning copy of input"); PG_RETURN_POINTER(entry_out); } /* Extract our index key from the GiST entry. */ result = geography_datum_gidx(entry_in->key, bbox_out); /* Is the bounding box valid (non-empty, non-infinite)? If not, return input uncompressed. */ if ( result == G_FAILURE ) { POSTGIS_DEBUG(4, "[GIST] empty geometry!"); PG_RETURN_POINTER(entry_in); } POSTGIS_DEBUGF(4, "[GIST] got entry_in->key: %s", gidx_to_string(bbox_out)); /* Check all the dimensions for finite values */ for ( i = 0; i < GIDX_NDIMS(bbox_out); i++ ) { if ( ! finite(GIDX_GET_MAX(bbox_out, i)) || ! finite(GIDX_GET_MIN(bbox_out, i)) ) { POSTGIS_DEBUG(4, "[GIST] infinite geometry!"); PG_RETURN_POINTER(entry_in); } } /* Enure bounding box has minimums below maximums. */ gidx_validate(bbox_out); /* Prepare GISTENTRY for return. */ gistentryinit(*entry_out, PointerGetDatum(gidx_copy(bbox_out)), entry_in->rel, entry_in->page, entry_in->offset, FALSE); /* Return GISTENTRY. */ POSTGIS_DEBUG(4, "[GIST] 'compress' function complete"); PG_RETURN_POINTER(entry_out); }
Datum geography_gist_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector*) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC*) PG_GETARG_POINTER(1); OffsetNumber i; /* One union box for each half of the space. */ GIDX **box_union; /* One offset number list for each half of the space. */ OffsetNumber **list; /* One position index for each half of the space. */ int *pos; GIDX *box_pageunion; GIDX *box_current; int direction = -1; bool all_entries_equal = true; OffsetNumber max_offset; int nbytes, ndims_pageunion, d; int posmax = -1; POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called"); /* ** First calculate the bounding box and maximum number of dimensions in this page. */ max_offset = entryvec->n - 1; box_current = (GIDX*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key); box_pageunion = gidx_copy(box_current); /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */ for ( i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i) ) { box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key); if ( all_entries_equal == true && ! gidx_equals (box_pageunion, box_current) ) all_entries_equal = false; gidx_merge( &box_pageunion, box_current ); } POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion)); /* Every box in the page is the same! So, we split and just put half the boxes in each child. */ if ( all_entries_equal ) { POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!"); geography_gist_picksplit_fallback(entryvec, v); PG_RETURN_POINTER(v); } /* Initialize memory structures. */ nbytes = (max_offset + 2) * sizeof(OffsetNumber); ndims_pageunion = GIDX_NDIMS(box_pageunion); POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion); pos = palloc(2*ndims_pageunion * sizeof(int)); list = palloc(2*ndims_pageunion * sizeof(OffsetNumber*)); box_union = palloc(2*ndims_pageunion * sizeof(GIDX*)); for ( d = 0; d < ndims_pageunion; d++ ) { list[BELOW(d)] = (OffsetNumber*) palloc(nbytes); list[ABOVE(d)] = (OffsetNumber*) palloc(nbytes); box_union[BELOW(d)] = gidx_new(ndims_pageunion); box_union[ABOVE(d)] = gidx_new(ndims_pageunion); pos[BELOW(d)] = 0; pos[ABOVE(d)] = 0; } /* ** Assign each entry in the node to the volume partitions it belongs to, ** such as "above the x/y plane, left of the y/z plane, below the x/z plane". ** Each entry thereby ends up in three of the six partitions. */ POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis"); for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) ) { box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key); for ( d = 0; d < ndims_pageunion; d++ ) { if ( GIDX_GET_MIN(box_current,d)-GIDX_GET_MIN(box_pageunion,d) < GIDX_GET_MAX(box_pageunion,d)-GIDX_GET_MAX(box_current,d) ) { geography_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i); } else { geography_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i); } } } /* ** "Bad disposition", too many entries fell into one octant of the space, so no matter which ** plane we choose to split on, we're going to end up with a mostly full node. Where the ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the ** sides of the page union box can occasionally all end up in one place, leading ** to this condition. */ if ( geography_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE ) { /* ** Instead we split on center points and see if we do better. ** First calculate the average center point for each axis. */ double *avgCenter = palloc(ndims_pageunion * sizeof(double)); for ( d = 0; d < ndims_pageunion; d++ ) { avgCenter[d] = 0.0; } POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method"); for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) ) { box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key); for ( d = 0; d < ndims_pageunion; d++ ) { avgCenter[d] += (GIDX_GET_MAX(box_current,d) + GIDX_GET_MIN(box_current,d)) / 2.0; } } for ( d = 0; d < ndims_pageunion; d++ ) { avgCenter[d] /= max_offset; pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */ POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]); } /* For each of our entries... */ for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) ) { double center; box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key); for ( d = 0; d < ndims_pageunion; d++ ) { center = (GIDX_GET_MIN(box_current,d)+GIDX_GET_MAX(box_current,d))/2.0; if ( center < avgCenter[d] ) geography_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i); else if ( FPeq(center, avgCenter[d]) ) if ( pos[BELOW(d)] > pos[ABOVE(d)] ) geography_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i); else geography_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i); else geography_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i); } } /* Do we have a good disposition now? If not, screw it, just cut the node in half. */ if ( geography_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE ) { POSTGIS_DEBUG(4, "[GIST] picksplit still cannot find a good split! just cutting the node in half"); geography_gist_picksplit_fallback(entryvec, v); PG_RETURN_POINTER(v); } } /* ** Now, what splitting plane gives us the most even ratio of ** entries in our child pages? Since each split region has been apportioned entries ** against the same number of total entries, the axis that has the smallest maximum ** number of entries in its regions is the most evenly distributed. ** TODO: what if the distributions are equal in two or more axes? */ for ( d = 0; d < ndims_pageunion; d++ ) { int posd = Max(pos[ABOVE(d)],pos[BELOW(d)]); if ( posd > posmax ) { direction = d; posmax = posd; } } if ( direction == -1 || posmax == -1 ) { /* ERROR OUT HERE */ elog(ERROR, "Error in building split, unable to determine split direction."); } POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction); geography_gist_picksplit_constructsplit(v, list[BELOW(direction)], pos[BELOW(direction)], &(box_union[BELOW(direction)]), list[ABOVE(direction)], pos[ABOVE(direction)], &(box_union[ABOVE(direction)]) ); POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX*)v->spl_ldatum)); POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX*)v->spl_rdatum)); POSTGIS_DEBUGF(4, "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)", direction, GIDX_GET_MIN(box_pageunion, direction), GIDX_GET_MAX(box_pageunion, direction), GIDX_GET_MIN((GIDX*)v->spl_ldatum, direction), GIDX_GET_MAX((GIDX*)v->spl_ldatum, direction), GIDX_GET_MIN((GIDX*)v->spl_rdatum, direction), GIDX_GET_MAX((GIDX*)v->spl_rdatum, direction) ); PG_RETURN_POINTER(v); }