Exemplo n.º 1
0
/*
 * Trivial split: half of entries will be placed on one page
 * and another half - to another
 */
static void
fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
{
	OffsetNumber i,
				maxoff;
	BOX		   *unionL = NULL,
			   *unionR = NULL;
	int			nbytes;

	maxoff = entryvec->n - 1;

	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);
	v->spl_nleft = v->spl_nright = 0;

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		BOX		   *cur = DatumGetBoxP(entryvec->vector[i].key);

		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
		{
			v->spl_left[v->spl_nleft] = i;
			if (unionL == NULL)
			{
				unionL = (BOX *) palloc(sizeof(BOX));
				*unionL = *cur;
			}
			else
				adjustBox(unionL, cur);

			v->spl_nleft++;
		}
		else
		{
			v->spl_right[v->spl_nright] = i;
			if (unionR == NULL)
			{
				unionR = (BOX *) palloc(sizeof(BOX));
				*unionR = *cur;
			}
			else
				adjustBox(unionR, cur);

			v->spl_nright++;
		}
	}

	if (v->spl_ldatum_exists)
		adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
	v->spl_ldatum = BoxPGetDatum(unionL);

	if (v->spl_rdatum_exists)
		adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
	v->spl_rdatum = BoxPGetDatum(unionR);

	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
Exemplo n.º 2
0
/*
 * Trivial split: half of entries will be placed on one page
 * and another half - to another
 */
static void
fallbackSplit(struct gist_entry_vector *entryvec, struct gist_splitvec *v)
{
	item_id_t i, maxoff;
	BOX *unionL = NULL;
	BOX *unionR = NULL;
	int nbytes;

	maxoff = entryvec->n - 1;
	nbytes = (maxoff + 2) * sizeof(item_id_t);
	v->spl_left = (item_id_t *) palloc(nbytes);
	v->spl_right = (item_id_t *) palloc(nbytes);
	v->spl_nleft = v->spl_nright = 0;

	for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) {
		BOX *cur;

		cur = D_TO_BOX_P(entryvec->vector[i].key);
		if (i <= (maxoff - FIRST_ITEM_ID + 1) / 2) {
			v->spl_left[v->spl_nleft] = i;
			if (unionL == NULL) {
				unionL = (BOX *) palloc(sizeof(BOX));
				*unionL = *cur;
			} else {
				adjustBox(unionL, cur);
			}

			v->spl_nleft++;
		} else {
			v->spl_right[v->spl_nright] = i;
			if (unionR == NULL) {
				unionR = (BOX *) palloc(sizeof(BOX));
				*unionR = *cur;
			} else {
				adjustBox(unionR, cur);
			}

			v->spl_nright++;
		}
	}

	if (v->spl_ldatum_exists)
		adjustBox(unionL, D_TO_BOX_P(v->spl_ldatum));

	v->spl_ldatum = BOX_P_TO_D(unionL);
	if (v->spl_rdatum_exists)
		adjustBox(unionR, D_TO_BOX_P(v->spl_rdatum));

	v->spl_rdatum = BOX_P_TO_D(unionR);
	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
Exemplo n.º 3
0
/*
 * The GiST Union method for boxes
 *
 * returns the minimal bounding box that encloses all the entries in entryvec
 */
datum_t gist_box_union(PG_FUNC_ARGS)
{
	struct gist_entry_vector* entryvec = (struct gist_entry_vector*) ARG_POINTER(0);
	int* sizep = (int*) ARG_POINTER(1);
	int numranges, i;
	BOX *cur;
	BOX *pageunion;

	numranges = entryvec->n;
	pageunion = (BOX *) palloc(sizeof(BOX));
	cur = D_TO_BOX_P(entryvec->vector[0].key);
	memcpy((void *)pageunion, (void *)cur, sizeof(BOX));

	for (i = 1; i < numranges; i++) {
		cur = D_TO_BOX_P(entryvec->vector[i].key);
		adjustBox(pageunion, cur);
	}

	*sizep = sizeof(BOX);
	RET_POINTER(pageunion);
}
Exemplo n.º 4
0
/*
 * The GiST Union method for boxes
 *
 * returns the minimal bounding box that encloses all the entries in entryvec
 */
Datum
gist_box_union(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	int		   *sizep = (int *) PG_GETARG_POINTER(1);
	int			numranges,
				i;
	BOX		   *cur,
			   *pageunion;

	numranges = entryvec->n;
	pageunion = (BOX *) palloc(sizeof(BOX));
	cur = DatumGetBoxP(entryvec->vector[0].key);
	memcpy((void *) pageunion, (void *) cur, sizeof(BOX));

	for (i = 1; i < numranges; i++)
	{
		cur = DatumGetBoxP(entryvec->vector[i].key);
		adjustBox(pageunion, cur);
	}
	*sizep = sizeof(BOX);

	PG_RETURN_POINTER(pageunion);
}
Exemplo n.º 5
0
/*
 * The GiST PickSplit method
 *
 * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree',
 * C.H.Ang and T.C.Tan
 *
 * This is used for both boxes and points.
 */
Datum
gist_box_picksplit(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i;
	OffsetNumber *listL,
			   *listR,
			   *listB,
			   *listT;
	BOX		   *unionL,
			   *unionR,
			   *unionB,
			   *unionT;
	int			posL,
				posR,
				posB,
				posT;
	BOX			pageunion;
	BOX		   *cur;
	char		direction = ' ';
	bool		allisequal = true;
	OffsetNumber maxoff;
	int			nbytes;

	posL = posR = posB = posT = 0;
	maxoff = entryvec->n - 1;

	cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key);
	memcpy((void *) &pageunion, (void *) cur, sizeof(BOX));

	/* find MBR */
	for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i))
	{
		cur = DatumGetBoxP(entryvec->vector[i].key);
		if (allisequal && (
						   pageunion.high.x != cur->high.x ||
						   pageunion.high.y != cur->high.y ||
						   pageunion.low.x != cur->low.x ||
						   pageunion.low.y != cur->low.y
						   ))
			allisequal = false;

		adjustBox(&pageunion, cur);
	}

	if (allisequal)
	{
		/*
		 * All entries are the same
		 */
		fallbackSplit(entryvec, v);
		PG_RETURN_POINTER(v);
	}

	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	listL = (OffsetNumber *) palloc(nbytes);
	listR = (OffsetNumber *) palloc(nbytes);
	listB = (OffsetNumber *) palloc(nbytes);
	listT = (OffsetNumber *) palloc(nbytes);
	unionL = (BOX *) palloc(sizeof(BOX));
	unionR = (BOX *) palloc(sizeof(BOX));
	unionB = (BOX *) palloc(sizeof(BOX));
	unionT = (BOX *) palloc(sizeof(BOX));

#define ADDLIST( list, unionD, pos, num ) do { \
	if ( pos ) { \
		if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x	= cur->high.x; \
		if ( (unionD)->low.x  > cur->low.x	) (unionD)->low.x	= cur->low.x; \
		if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y	= cur->high.y; \
		if ( (unionD)->low.y  > cur->low.y	) (unionD)->low.y	= cur->low.y; \
	} else { \
			memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) );	\
	} \
	(list)[pos] = num; \
	(pos)++; \
} while(0)

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		cur = DatumGetBoxP(entryvec->vector[i].key);
		if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
			ADDLIST(listL, unionL, posL, i);
		else
			ADDLIST(listR, unionR, posR, i);
		if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
			ADDLIST(listB, unionB, posB, i);
		else
			ADDLIST(listT, unionT, posT, i);
	}

#define LIMIT_RATIO 0.1
#define _IS_BADRATIO(x,y)	( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO )
#define IS_BADRATIO(x,y) ( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) )
	/* bad disposition, try to split by centers of boxes  */
	if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB))
	{
		double		avgCenterX = 0.0,
					avgCenterY = 0.0;
		double		CenterX,
					CenterY;

		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			cur = DatumGetBoxP(entryvec->vector[i].key);
			avgCenterX += ((double) cur->high.x + (double) cur->low.x) / 2.0;
			avgCenterY += ((double) cur->high.y + (double) cur->low.y) / 2.0;
		}

		avgCenterX /= maxoff;
		avgCenterY /= maxoff;

		posL = posR = posB = posT = 0;
		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			cur = DatumGetBoxP(entryvec->vector[i].key);

			CenterX = ((double) cur->high.x + (double) cur->low.x) / 2.0;
			CenterY = ((double) cur->high.y + (double) cur->low.y) / 2.0;

			if (CenterX < avgCenterX)
				ADDLIST(listL, unionL, posL, i);
			else if (CenterX == avgCenterX)
			{
				if (posL > posR)
					ADDLIST(listR, unionR, posR, i);
				else
					ADDLIST(listL, unionL, posL, i);
			}
			else
				ADDLIST(listR, unionR, posR, i);

			if (CenterY < avgCenterY)
				ADDLIST(listB, unionB, posB, i);
			else if (CenterY == avgCenterY)
			{
				if (posB > posT)
					ADDLIST(listT, unionT, posT, i);
				else
					ADDLIST(listB, unionB, posB, i);
			}
			else
				ADDLIST(listT, unionT, posT, i);
		}

		if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB))
		{
			fallbackSplit(entryvec, v);
			PG_RETURN_POINTER(v);
		}
	}

	/* which split more optimal? */
	if (Max(posL, posR) < Max(posB, posT))
		direction = 'x';
	else if (Max(posL, posR) > Max(posB, posT))
		direction = 'y';
	else
	{
		Datum		interLR = DirectFunctionCall2(rt_box_inter,
												  BoxPGetDatum(unionL),
												  BoxPGetDatum(unionR));
		Datum		interBT = DirectFunctionCall2(rt_box_inter,
												  BoxPGetDatum(unionB),
												  BoxPGetDatum(unionT));
		double		sizeLR,
					sizeBT;

		sizeLR = size_box(interLR);
		sizeBT = size_box(interBT);

		if (sizeLR < sizeBT)
			direction = 'x';
		else
			direction = 'y';
	}

	if (direction == 'x')
		chooseLR(v,
				 listL, posL, unionL,
				 listR, posR, unionR);
	else
		chooseLR(v,
				 listB, posB, unionB,
				 listT, posT, unionT);

	PG_RETURN_POINTER(v);
}
Exemplo n.º 6
0
static void
chooseLR(GIST_SPLITVEC *v,
		 OffsetNumber *list1, int nlist1, BOX *union1,
		 OffsetNumber *list2, int nlist2, BOX *union2)
{
	bool		firstToLeft = true;

	if (v->spl_ldatum_exists || v->spl_rdatum_exists)
	{
		if (v->spl_ldatum_exists && v->spl_rdatum_exists)
		{
			BOX			LRl = *union1,
						LRr = *union2;
			BOX			RLl = *union2,
						RLr = *union1;
			double		sizeLR,
						sizeRL;

			adjustBox(&LRl, DatumGetBoxP(v->spl_ldatum));
			adjustBox(&LRr, DatumGetBoxP(v->spl_rdatum));
			adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum));
			adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum));

			sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)));
			sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)));

			if (sizeLR > sizeRL)
				firstToLeft = false;

		}
		else
		{
			float		p1,
						p2;
			GISTENTRY	oldUnion,
						addon;

			gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
						  NULL, NULL, InvalidOffsetNumber, FALSE);

			gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE);
			DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p1));
			gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE);
			DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p2));

			if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
				firstToLeft = false;
		}
	}

	if (firstToLeft)
	{
		v->spl_left = list1;
		v->spl_right = list2;
		v->spl_nleft = nlist1;
		v->spl_nright = nlist2;
		if (v->spl_ldatum_exists)
			adjustBox(union1, DatumGetBoxP(v->spl_ldatum));
		v->spl_ldatum = BoxPGetDatum(union1);
		if (v->spl_rdatum_exists)
			adjustBox(union2, DatumGetBoxP(v->spl_rdatum));
		v->spl_rdatum = BoxPGetDatum(union2);
	}
	else
	{
		v->spl_left = list2;
		v->spl_right = list1;
		v->spl_nleft = nlist2;
		v->spl_nright = nlist1;
		if (v->spl_ldatum_exists)
			adjustBox(union2, DatumGetBoxP(v->spl_ldatum));
		v->spl_ldatum = BoxPGetDatum(union2);
		if (v->spl_rdatum_exists)
			adjustBox(union1, DatumGetBoxP(v->spl_rdatum));
		v->spl_rdatum = BoxPGetDatum(union1);
	}

	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
Exemplo n.º 7
0
/*
 * The GiST PickSplit method
 *
 * New linear algorithm, see 'New Linear node_n Splitting Algorithm for R-tree',
 * C.H.Ang and T.C.Tan
 *
 * This is used for both boxes and points.
 */
datum_t gist_box_picksplit(PG_FUNC_ARGS)
{
	struct gist_entry_vector *entryvec = (struct gist_entry_vector *)ARG_POINTER(0);
	struct gist_splitvec *v = (struct gist_splitvec *)ARG_POINTER(1);
	item_id_t i;
	item_id_t *listL;
	item_id_t *listR;
	item_id_t *listB;
	item_id_t *listT;
	BOX *unionL;
	BOX *unionR;
	BOX *unionB;
	BOX *unionT;
	int posL, posR, posB, posT;
	BOX pageunion;
	BOX *cur;
	char direction = ' ';
	bool allisequal = true;
	item_id_t maxoff;
	int nbytes;

	posL = posR = posB = posT = 0;
	maxoff = entryvec->n - 1;

	cur = D_TO_BOX_P(entryvec->vector[FIRST_ITEM_ID].key);
	memcpy((void*) &pageunion, (void*) cur, sizeof(BOX));

	/* find MBR */
	for (i = ITEM_ID_NEXT(FIRST_ITEM_ID); i <= maxoff; i = ITEM_ID_NEXT(i)) {
		cur = D_TO_BOX_P(entryvec->vector[i].key);
		if (allisequal
			&& (pageunion.high.x != cur->high.x
				|| pageunion.high.y != cur->high.y
				|| pageunion.low.x != cur->low.x
				|| pageunion.low.y != cur->low.y))
			allisequal = false;

		adjustBox(&pageunion, cur);
	}

	if (allisequal) {
		/*
		 * All entries are the same
		 */
		fallbackSplit(entryvec, v);
		RET_POINTER(v);
	}

	nbytes = (maxoff + 2) * sizeof(item_id_t);
	listL = (item_id_t *) palloc(nbytes);
	listR = (item_id_t *) palloc(nbytes);
	listB = (item_id_t *) palloc(nbytes);
	listT = (item_id_t *) palloc(nbytes);
	unionL = (BOX *) palloc(sizeof(BOX));
	unionR = (BOX *) palloc(sizeof(BOX));
	unionB = (BOX *) palloc(sizeof(BOX));
	unionT = (BOX *) palloc(sizeof(BOX));

#define ADDLIST( list, unionD, pos, num ) do { \
	if ( pos ) { \
		if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x	= cur->high.x; \
		if ( (unionD)->low.x  > cur->low.x	) (unionD)->low.x	= cur->low.x; \
		if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y	= cur->high.y; \
		if ( (unionD)->low.y  > cur->low.y	) (unionD)->low.y	= cur->low.y; \
	} else { \
			memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) );	\
	} \
		\
	(list)[pos] = num; \
	(pos)++; \
} while(0)

	for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) {
		cur = D_TO_BOX_P(entryvec->vector[i].key);
		if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
			ADDLIST(listL, unionL, posL, i);
		else
			ADDLIST(listR, unionR, posR, i);

		if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
			ADDLIST(listB, unionB, posB, i);
		else
			ADDLIST(listT, unionT, posT, i);
	}

#define LIMIT_RATIO 0.1
#define _IS_BADRATIO(x,y)	( (y) == 0 || (float)(x)/(float)(y) < LIMIT_RATIO )
#define IS_BADRATIO(x,y) 	( _IS_BADRATIO((x),(y)) || _IS_BADRATIO((y),(x)) )

	/* bad disposition, try to split by centers of boxes  */
	if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) {
		double avgCenterX = 0.0;
		double avgCenterY = 0.0;
		double CenterX;
		double CenterY;

		for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) {
			cur = D_TO_BOX_P(entryvec->vector[i].key);
			avgCenterX += ((double)cur->high.x + (double)cur->low.x) / 2.0;
			avgCenterY += ((double)cur->high.y + (double)cur->low.y) / 2.0;
		}

		avgCenterX /= maxoff;
		avgCenterY /= maxoff;

		posL = posR = posB = posT = 0;
		for (i = FIRST_ITEM_ID; i <= maxoff; i = ITEM_ID_NEXT(i)) {
			cur = D_TO_BOX_P(entryvec->vector[i].key);

			CenterX = ((double)cur->high.x + (double)cur->low.x) / 2.0;
			CenterY = ((double)cur->high.y + (double)cur->low.y) / 2.0;

			if (CenterX < avgCenterX)
				ADDLIST(listL, unionL, posL, i);
			else if (CenterX == avgCenterX) {
				if (posL > posR)
					ADDLIST(listR, unionR, posR, i);
				else
					ADDLIST(listL, unionL, posL, i);
			} else
				ADDLIST(listR, unionR, posR, i);

			if (CenterY < avgCenterY)
				ADDLIST(listB, unionB, posB, i);
			else if (CenterY == avgCenterY) {
				if (posB > posT)
					ADDLIST(listT, unionT, posT, i);
				else
					ADDLIST(listB, unionB, posB, i);
			} else
				ADDLIST(listT, unionT, posT, i);
		}

		if (IS_BADRATIO(posR, posL) && IS_BADRATIO(posT, posB)) {
			fallbackSplit(entryvec, v);
			RET_POINTER(v);
		}
	}

	/* which split more optimal? */
	if (Max(posL, posR) < Max(posB, posT))
		direction = 'x';
	else if (Max(posL, posR) > Max(posB, posT))
		direction = 'y';
	else {
		datum_t interLR = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionL), BOX_P_TO_D(unionR));
		datum_t interBT = DIRECT_FC2(rt_box_inter, BOX_P_TO_D(unionB), BOX_P_TO_D(unionT));
		double sizeLR;
		double sizeBT;

		sizeLR = size_box(interLR);
		sizeBT = size_box(interBT);
		if (sizeLR < sizeBT)
			direction = 'x';
		else
			direction = 'y';
	}

	if (direction == 'x')
		chooseLR(v, listL, posL, unionL, listR, posR, unionR);
	else
		chooseLR(v, listB, posB, unionB, listT, posT, unionT);

	RET_POINTER(v);
}
Exemplo n.º 8
0
static void
chooseLR(
	struct gist_splitvec *v,
	item_id_t* list1,
	int nlist1,
	BOX* union1,
	item_id_t* list2,
	int nlist2,
	BOX* union2)
{
	bool firstToLeft = true;

	if (v->spl_ldatum_exists || v->spl_rdatum_exists) {
		if (v->spl_ldatum_exists && v->spl_rdatum_exists) {
			BOX LRl = *union1;
			BOX LRr = *union2;
			BOX RLl = *union2;
			BOX RLr = *union1;
			double sizeLR;
			double sizeRL;

			adjustBox(&LRl, D_TO_BOX_P(v->spl_ldatum));
			adjustBox(&LRr, D_TO_BOX_P(v->spl_rdatum));
			adjustBox(&RLl, D_TO_BOX_P(v->spl_ldatum));
			adjustBox(&RLr, D_TO_BOX_P(v->spl_rdatum));

			sizeLR = size_box(DIRECT_FC2(
				rt_box_inter,
				BOX_P_TO_D(&LRl), 
				BOX_P_TO_D(&LRr)));
			sizeRL = size_box(DIRECT_FC2(
				rt_box_inter,
				BOX_P_TO_D(&RLl),
				BOX_P_TO_D(&RLr)));

			if (sizeLR > sizeRL)
				firstToLeft = false;

		} else {
			float p1;
			float p2;
			struct gist_entry oldUnion;
			struct gist_entry addon;

			gistentryinit(
				oldUnion,
				(v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
				NULL,
				NULL,
				INVALID_ITEM_ID,
				FALSE);

			gistentryinit(addon, BOX_P_TO_D(union1), NULL, NULL, INVALID_ITEM_ID, FALSE);
			DIRECT_FC3(gist_box_penalty,
					    PTR_TO_D(&oldUnion),
					    PTR_TO_D(&addon),
					    PTR_TO_D(&p1));
			gistentryinit(addon, BOX_P_TO_D(union2), NULL, NULL, INVALID_ITEM_ID, FALSE);
			DIRECT_FC3(
				gist_box_penalty,
				PTR_TO_D(&oldUnion),
				PTR_TO_D(&addon),
				PTR_TO_D(&p2));

			if ((v->spl_ldatum_exists && p1 > p2)
				|| (v->spl_rdatum_exists && p1 < p2))
				firstToLeft = false;
		}
	}

	if (firstToLeft) {
		v->spl_left = list1;
		v->spl_right = list2;
		v->spl_nleft = nlist1;
		v->spl_nright = nlist2;
		if (v->spl_ldatum_exists)
			adjustBox(union1, D_TO_BOX_P(v->spl_ldatum));

		v->spl_ldatum = BOX_P_TO_D(union1);
		if (v->spl_rdatum_exists)
			adjustBox(union2, D_TO_BOX_P(v->spl_rdatum));

		v->spl_rdatum = BOX_P_TO_D(union2);
	} else {
		v->spl_left = list2;
		v->spl_right = list1;
		v->spl_nleft = nlist2;
		v->spl_nright = nlist1;
		if (v->spl_ldatum_exists)
			adjustBox(union2, D_TO_BOX_P(v->spl_ldatum));

		v->spl_ldatum = BOX_P_TO_D(union2);
		if (v->spl_rdatum_exists)
			adjustBox(union1, D_TO_BOX_P(v->spl_rdatum));

		v->spl_rdatum = BOX_P_TO_D(union1);
	}

	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
Exemplo n.º 9
0
/*
 * The GiST PickSplit method
 *
 * New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree',
 * C.H.Ang and T.C.Tan
 */
Datum
gist_box_picksplit(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i;
	OffsetNumber *listL,
			   *listR,
			   *listB,
			   *listT;
	BOX		   *unionL,
			   *unionR,
			   *unionB,
			   *unionT;
	int			posL,
				posR,
				posB,
				posT;
	BOX			pageunion;
	BOX		   *cur;
	char		direction = ' ';
	bool		allisequal = true;
	OffsetNumber maxoff;
	int			nbytes;

	posL = posR = posB = posT = 0;
	maxoff = entryvec->n - 1;

	cur = DatumGetBoxP(entryvec->vector[FirstOffsetNumber].key);
	memcpy((void *) &pageunion, (void *) cur, sizeof(BOX));

	/* find MBR */
	for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i))
	{
		cur = DatumGetBoxP(entryvec->vector[i].key);
		if (allisequal == true && (
								   pageunion.high.x != cur->high.x ||
								   pageunion.high.y != cur->high.y ||
								   pageunion.low.x != cur->low.x ||
								   pageunion.low.y != cur->low.y
								   ))
			allisequal = false;

		adjustBox(&pageunion, cur);
	}

	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	listL = (OffsetNumber *) palloc(nbytes);
	listR = (OffsetNumber *) palloc(nbytes);
	unionL = (BOX *) palloc(sizeof(BOX));
	unionR = (BOX *) palloc(sizeof(BOX));
	if (allisequal)
	{
		cur = DatumGetBoxP(entryvec->vector[OffsetNumberNext(FirstOffsetNumber)].key);
		if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX)) == 0)
		{
			v->spl_left = listL;
			v->spl_right = listR;
			v->spl_nleft = v->spl_nright = 0;
			memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX));
			memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX));

			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
			{
				if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
				{
					v->spl_left[v->spl_nleft] = i;
					v->spl_nleft++;
				}
				else
				{
					v->spl_right[v->spl_nright] = i;
					v->spl_nright++;
				}
			}

			if (v->spl_ldatum_exists)
				adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
			v->spl_ldatum = BoxPGetDatum(unionL);

			if (v->spl_rdatum_exists)
				adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
			v->spl_rdatum = BoxPGetDatum(unionR);

			v->spl_ldatum_exists = v->spl_rdatum_exists = false;

			PG_RETURN_POINTER(v);
		}
	}

	listB = (OffsetNumber *) palloc(nbytes);
	listT = (OffsetNumber *) palloc(nbytes);
	unionB = (BOX *) palloc(sizeof(BOX));
	unionT = (BOX *) palloc(sizeof(BOX));

#define ADDLIST( list, unionD, pos, num ) do { \
	if ( pos ) { \
		if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x	= cur->high.x; \
		if ( (unionD)->low.x  > cur->low.x	) (unionD)->low.x	= cur->low.x; \
		if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y	= cur->high.y; \
		if ( (unionD)->low.y  > cur->low.y	) (unionD)->low.y	= cur->low.y; \
	} else { \
			memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) );	\
	} \
	(list)[pos] = num; \
	(pos)++; \
} while(0)

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		cur = DatumGetBoxP(entryvec->vector[i].key);
		if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
			ADDLIST(listL, unionL, posL, i);
		else
			ADDLIST(listR, unionR, posR, i);
		if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
			ADDLIST(listB, unionB, posB, i);
		else
			ADDLIST(listT, unionT, posT, i);
	}

	/* bad disposition, sort by ascending and resplit */
	if ((posR == 0 || posL == 0) && (posT == 0 || posB == 0))
	{
		KBsort	   *arr = (KBsort *) palloc(sizeof(KBsort) * maxoff);

		posL = posR = posB = posT = 0;
		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			arr[i - 1].key = DatumGetBoxP(entryvec->vector[i].key);
			arr[i - 1].pos = i;
		}
		qsort(arr, maxoff, sizeof(KBsort), compare_KB);
		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			cur = arr[i - 1].key;
			if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
				ADDLIST(listL, unionL, posL, arr[i - 1].pos);
			else if (cur->low.x - pageunion.low.x == pageunion.high.x - cur->high.x)
			{
				if (posL > posR)
					ADDLIST(listR, unionR, posR, arr[i - 1].pos);
				else
					ADDLIST(listL, unionL, posL, arr[i - 1].pos);
			}
			else
				ADDLIST(listR, unionR, posR, arr[i - 1].pos);

			if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
				ADDLIST(listB, unionB, posB, arr[i - 1].pos);
			else if (cur->low.y - pageunion.low.y == pageunion.high.y - cur->high.y)
			{
				if (posB > posT)
					ADDLIST(listT, unionT, posT, arr[i - 1].pos);
				else
					ADDLIST(listB, unionB, posB, arr[i - 1].pos);
			}
			else
				ADDLIST(listT, unionT, posT, arr[i - 1].pos);
		}
	}

	/* which split more optimal? */
	if (Max(posL, posR) < Max(posB, posT))
		direction = 'x';
	else if (Max(posL, posR) > Max(posB, posT))
		direction = 'y';
	else
	{
		Datum		interLR = DirectFunctionCall2(rt_box_inter,
												  BoxPGetDatum(unionL),
												  BoxPGetDatum(unionR));
		Datum		interBT = DirectFunctionCall2(rt_box_inter,
												  BoxPGetDatum(unionB),
												  BoxPGetDatum(unionT));
		double		sizeLR,
					sizeBT;

		sizeLR = size_box(interLR);
		sizeBT = size_box(interBT);

		if (sizeLR < sizeBT)
			direction = 'x';
		else
			direction = 'y';
	}

	if (direction == 'x')
		chooseLR(v,
				 listL, posL, unionL,
				 listR, posR, unionR);
	else
		chooseLR(v,
				 listB, posB, unionB,
				 listT, posT, unionT);

	PG_RETURN_POINTER(v);
}
Exemplo n.º 10
0
/*
 * --------------------------------------------------------------------------
 * Double sorting split algorithm. This is used for both boxes and points.
 *
 * The algorithm finds split of boxes by considering splits along each axis.
 * Each entry is first projected as an interval on the X-axis, and different
 * ways to split the intervals into two groups are considered, trying to
 * minimize the overlap of the groups. Then the same is repeated for the
 * Y-axis, and the overall best split is chosen. The quality of a split is
 * determined by overlap along that axis and some other criteria (see
 * g_box_consider_split).
 *
 * After that, all the entries are divided into three groups:
 *
 * 1) Entries which should be placed to the left group
 * 2) Entries which should be placed to the right group
 * 3) "Common entries" which can be placed to any of groups without affecting
 *	  of overlap along selected axis.
 *
 * The common entries are distributed by minimizing penalty.
 *
 * For details see:
 * "A new___ double sorting-based node splitting algorithm for R-tree", A. Korotkov
 * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
 * --------------------------------------------------------------------------
 */
Datum
gist_box_picksplit(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i,
				maxoff;
	ConsiderSplitContext context;
	BOX		   *box,
			   *leftBox,
			   *rightBox;
	int			dim,
				commonEntriesCount;
	SplitInterval *intervalsLower,
			   *intervalsUpper;
	CommonEntry *commonEntries;
	int			nentries;

	memset(&context, 0, sizeof(ConsiderSplitContext));

	maxoff = entryvec->n - 1;
	nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;

	/* Allocate arrays for intervals along axes */
	intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
	intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));

	/*
	 * Calculate the overall minimum bounding box over all the entries.
	 */
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		box = DatumGetBoxP(entryvec->vector[i].key);
		if (i == FirstOffsetNumber)
			context.boundingBox = *box;
		else
			adjustBox(&context.boundingBox, box);
	}

	/*
	 * Iterate over axes for optimal split searching.
	 */
	context.first = true;		/* nothing selected yet */
	for (dim = 0; dim < 2; dim++)
	{
		double		leftUpper,
					rightLower;
		int			i1,
					i2;

		/* Project each entry as an interval on the selected axis. */
		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			box = DatumGetBoxP(entryvec->vector[i].key);
			if (dim == 0)
			{
				intervalsLower[i - FirstOffsetNumber].lower = box->low.x;
				intervalsLower[i - FirstOffsetNumber].upper = box->high.x;
			}
			else
			{
				intervalsLower[i - FirstOffsetNumber].lower = box->low.y;
				intervalsLower[i - FirstOffsetNumber].upper = box->high.y;
			}
		}

		/*
		 * Make two arrays of intervals: one sorted by lower bound and another
		 * sorted by upper bound.
		 */
		memcpy(intervalsUpper, intervalsLower,
			   sizeof(SplitInterval) * nentries);
		qsort(intervalsLower, nentries, sizeof(SplitInterval),
			  interval_cmp_lower);
		qsort(intervalsUpper, nentries, sizeof(SplitInterval),
			  interval_cmp_upper);

		/*----
		 * The goal is to form a left and right interval, so that every entry
		 * interval is contained by either left or right interval (or both).
		 *
		 * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
		 *
		 * 0 1 2 3 4
		 * +-+
		 *	 +---+
		 *	   +-+
		 *	   +---+
		 *
		 * The left and right intervals are of the form (0,a) and (b,4).
		 * We first consider splits where b is the lower bound of an entry.
		 * We iterate through all entries, and for each b, calculate the
		 * smallest possible a. Then we consider splits where a is the
		 * uppper bound of an entry, and for each a, calculate the greatest
		 * possible b.
		 *
		 * In the above example, the first loop would consider splits:
		 * b=0: (0,1)-(0,4)
		 * b=1: (0,1)-(1,4)
		 * b=2: (0,3)-(2,4)
		 *
		 * And the second loop:
		 * a=1: (0,1)-(1,4)
		 * a=3: (0,3)-(2,4)
		 * a=4: (0,4)-(2,4)
		 */

		/*
		 * Iterate over lower bound of right group, finding smallest possible
		 * upper bound of left group.
		 */
		i1 = 0;
		i2 = 0;
		rightLower = intervalsLower[i1].lower;
		leftUpper = intervalsUpper[i2].lower;
		while (true)
		{
			/*
			 * Find next lower bound of right group.
			 */
			while (i1 < nentries && rightLower == intervalsLower[i1].lower)
			{
				leftUpper = Max(leftUpper, intervalsLower[i1].upper);
				i1++;
			}
			if (i1 >= nentries)
				break;
			rightLower = intervalsLower[i1].lower;

			/*
			 * Find count of intervals which anyway should be placed to the
			 * left group.
			 */
			while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
				i2++;

			/*
			 * Consider found split.
			 */
			g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
		}

		/*
		 * Iterate over upper bound of left group finding greates possible
		 * lower bound of right group.
		 */
		i1 = nentries - 1;
		i2 = nentries - 1;
		rightLower = intervalsLower[i1].upper;
		leftUpper = intervalsUpper[i2].upper;
		while (true)
		{
			/*
			 * Find next upper bound of left group.
			 */
			while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
			{
				rightLower = Min(rightLower, intervalsUpper[i2].lower);
				i2--;
			}
			if (i2 < 0)
				break;
			leftUpper = intervalsUpper[i2].upper;

			/*
			 * Find count of intervals which anyway should be placed to the
			 * right group.
			 */
			while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
				i1--;

			/*
			 * Consider found split.
			 */
			g_box_consider_split(&context, dim,
								 rightLower, i1 + 1, leftUpper, i2 + 1);
		}
	}

	/*
	 * If we failed to find any acceptable splits, use trivial split.
	 */
	if (context.first)
	{
		fallbackSplit(entryvec, v);
		PG_RETURN_POINTER(v);
	}

	/*
	 * Ok, we have now selected the split across one axis.
	 *
	 * While considering the splits, we already determined that there will be
	 * enough entries in both groups to reach the desired ratio, but we did
	 * not memorize which entries go to which group. So determine that now.
	 */

	/* Allocate vectors for results */
	v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
	v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
	v->spl_nleft = 0;
	v->spl_nright = 0;

	/* Allocate bounding boxes of left and right groups */
	leftBox = static_cast<BOX *>(palloc0(sizeof(BOX)));
	rightBox = static_cast<BOX *>(palloc0(sizeof(BOX)));

	/*
	 * Allocate an array for "common entries" - entries which can be placed to
	 * either group without affecting overlap along selected axis.
	 */
	commonEntriesCount = 0;
	commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));

	/* Helper macros to place an entry in the left or right group */
#define PLACE_LEFT(box, off)					\
	do {										\
		if (v->spl_nleft > 0)					\
			adjustBox(leftBox, box);			\
		else									\
			*leftBox = *(box);					\
		v->spl_left[v->spl_nleft++] = off;		\
	} while(0)

#define PLACE_RIGHT(box, off)					\
	do {										\
		if (v->spl_nright > 0)					\
			adjustBox(rightBox, box);			\
		else									\
			*rightBox = *(box);					\
		v->spl_right[v->spl_nright++] = off;	\
	} while(0)

	/*
	 * Distribute entries which can be distributed unambiguously, and collect
	 * common entries.
	 */
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		double		lower,
					upper;

		/*
		 * Get upper and lower bounds along selected axis.
		 */
		box = DatumGetBoxP(entryvec->vector[i].key);
		if (context.dim == 0)
		{
			lower = box->low.x;
			upper = box->high.x;
		}
		else
		{
			lower = box->low.y;
			upper = box->high.y;
		}

		if (upper <= context.leftUpper)
		{
			/* Fits to the left group */
			if (lower >= context.rightLower)
			{
				/* Fits also to the right group, so "common entry" */
				commonEntries[commonEntriesCount++].index = i;
			}
			else
			{
				/* Doesn't fit to the right group, so join to the left group */
				PLACE_LEFT(box, i);
			}
		}
		else
		{
			/*
			 * Each entry should fit on either left or right group. Since this
			 * entry didn't fit on the left group, it better fit in the right
			 * group.
			 */
			Assert(lower >= context.rightLower);

			/* Doesn't fit to the left group, so join to the right group */
			PLACE_RIGHT(box, i);
		}
	}

	/*
	 * Distribute "common entries", if any.
	 */
	if (commonEntriesCount > 0)
	{
		/*
		 * Calculate minimum number of entries that must be placed in both
		 * groups, to reach LIMIT_RATIO.
		 */
		int			m = ceil(LIMIT_RATIO * (double) nentries);

		/*
		 * Calculate delta between penalties of join "common entries" to
		 * different groups.
		 */
		for (i = 0; i < commonEntriesCount; i++)
		{
			box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key);
			commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
										 box_penalty(rightBox, box));
		}

		/*
		 * Sort "common entries" by calculated deltas in order to distribute
		 * the most ambiguous entries first.
		 */
		qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);

		/*
		 * Distribute "common entries" between groups.
		 */
		for (i = 0; i < commonEntriesCount; i++)
		{
			box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key);

			/*
			 * Check if we have to place this entry in either group to achieve
			 * LIMIT_RATIO.
			 */
			if (v->spl_nleft + (commonEntriesCount - i) <= m)
				PLACE_LEFT(box, commonEntries[i].index);
			else if (v->spl_nright + (commonEntriesCount - i) <= m)
				PLACE_RIGHT(box, commonEntries[i].index);
			else
			{
				/* Otherwise select the group by minimal penalty */
				if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
					PLACE_LEFT(box, commonEntries[i].index);
				else
					PLACE_RIGHT(box, commonEntries[i].index);
			}
		}
	}

	v->spl_ldatum = PointerGetDatum(leftBox);
	v->spl_rdatum = PointerGetDatum(rightBox);
	PG_RETURN_POINTER(v);
}