예제 #1
0
Datum
gtsquery_picksplit(PG_FUNCTION_ARGS)
{
    GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
    GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
    OffsetNumber maxoff = entryvec->n - 2;
    OffsetNumber k,
                 j;

    TSQuerySign *datum_l,
                *datum_r;
    int4		size_alpha,
                size_beta;
    int4		size_waste,
                waste = -1;
    int4		nbytes;
    OffsetNumber seed_1 = 0,
                 seed_2 = 0;
    OffsetNumber *left,
                 *right;

    SPLITCOST  *costvector;

    nbytes = (maxoff + 2) * sizeof(OffsetNumber);
    left = v->spl_left = (OffsetNumber *) palloc(nbytes);
    right = v->spl_right = (OffsetNumber *) palloc(nbytes);
    v->spl_nleft = v->spl_nright = 0;

    for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
        for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
        {
            size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
            if (size_waste > waste)
            {
                waste = size_waste;
                seed_1 = k;
                seed_2 = j;
            }
        }


    if (seed_1 == 0 || seed_2 == 0)
    {
        seed_1 = 1;
        seed_2 = 2;
    }

    datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign));
    *datum_l = *GETENTRY(entryvec, seed_1);
    datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign));
    *datum_r = *GETENTRY(entryvec, seed_2);


    maxoff = OffsetNumberNext(maxoff);
    costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
    for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
    {
        costvector[j - 1].pos = j;
        size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
        size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
        costvector[j - 1].cost = abs(size_alpha - size_beta);
    }
    qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

    for (k = 0; k < maxoff; k++)
    {
        j = costvector[k].pos;
        if (j == seed_1)
        {
            *left++ = j;
            v->spl_nleft++;
            continue;
        }
        else if (j == seed_2)
        {
            *right++ = j;
            v->spl_nright++;
            continue;
        }
        size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
        size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));

        if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
        {
            *datum_l |= *GETENTRY(entryvec, j);
            *left++ = j;
            v->spl_nleft++;
        }
        else
        {
            *datum_r |= *GETENTRY(entryvec, j);
            *right++ = j;
            v->spl_nright++;
        }
    }

    *right = *left = FirstOffsetNumber;
    v->spl_ldatum = PointerGetDatum(datum_l);
    v->spl_rdatum = PointerGetDatum(datum_r);

    PG_RETURN_POINTER(v);
}
/*
** The GiST PickSplit method for _intments
** We use Guttman's poly time split algorithm
*/
Datum
g_int_picksplit(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i,
				j;
	ArrayType  *datum_alpha,
			   *datum_beta;
	ArrayType  *datum_l,
			   *datum_r;
	ArrayType  *union_d,
			   *union_dl,
			   *union_dr;
	ArrayType  *inter_d;
	bool		firsttime;
	float		size_alpha,
				size_beta,
				size_union,
				size_inter;
	float		size_waste,
				waste;
	float		size_l,
				size_r;
	int			nbytes;
	OffsetNumber seed_1 = 0,
				seed_2 = 0;
	OffsetNumber *left,
			   *right;
	OffsetNumber maxoff;
	SPLITCOST  *costvector;

#ifdef GIST_DEBUG
	elog(DEBUG3, "--------picksplit %d", entryvec->n);
#endif

	maxoff = entryvec->n - 2;
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

	firsttime = true;
	waste = 0.0;
	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
	{
		datum_alpha = GETENTRY(entryvec, i);
		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
		{
			datum_beta = GETENTRY(entryvec, j);

			/* compute the wasted space by unioning these guys */
			/* size_waste = size_union - size_inter; */
			union_d = inner_int_union(datum_alpha, datum_beta);
			rt__int_size(union_d, &size_union);
			inter_d = inner_int_inter(datum_alpha, datum_beta);
			rt__int_size(inter_d, &size_inter);
			size_waste = size_union - size_inter;

			pfree(union_d);

			if (inter_d != (ArrayType *) NULL)
				pfree(inter_d);

			/*
			 * are these a more promising split that what we've already seen?
			 */

			if (size_waste > waste || firsttime)
			{
				waste = size_waste;
				seed_1 = i;
				seed_2 = j;
				firsttime = false;
			}
		}
	}

	left = v->spl_left;
	v->spl_nleft = 0;
	right = v->spl_right;
	v->spl_nright = 0;
	if (seed_1 == 0 || seed_2 == 0)
	{
		seed_1 = 1;
		seed_2 = 2;
	}

	datum_alpha = GETENTRY(entryvec, seed_1);
	datum_l = copy_intArrayType(datum_alpha);
	rt__int_size(datum_l, &size_l);
	datum_beta = GETENTRY(entryvec, seed_2);
	datum_r = copy_intArrayType(datum_beta);
	rt__int_size(datum_r, &size_r);

	maxoff = OffsetNumberNext(maxoff);

	/*
	 * sort entries
	 */
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		costvector[i - 1].pos = i;
		datum_alpha = GETENTRY(entryvec, i);
		union_d = inner_int_union(datum_l, datum_alpha);
		rt__int_size(union_d, &size_alpha);
		pfree(union_d);
		union_d = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_d, &size_beta);
		pfree(union_d);
		costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
	}
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

	/*
	 * Now split up the regions between the two seeds.	An important property
	 * of this split algorithm is that the split vector v has the indices of
	 * items to be split in order in its left and right vectors.  We exploit
	 * this property by doing a merge in the code that actually splits the
	 * page.
	 *
	 * For efficiency, we also place the new index tuple in this loop. This is
	 * handled at the very end, when we have placed all the existing tuples
	 * and i == maxoff + 1.
	 */


	for (j = 0; j < maxoff; j++)
	{
		i = costvector[j].pos;

		/*
		 * If we've already decided where to place this item, just put it on
		 * the right list.	Otherwise, we need to figure out which page needs
		 * the least enlargement in order to store the item.
		 */

		if (i == seed_1)
		{
			*left++ = i;
			v->spl_nleft++;
			continue;
		}
		else if (i == seed_2)
		{
			*right++ = i;
			v->spl_nright++;
			continue;
		}

		/* okay, which page needs least enlargement? */
		datum_alpha = GETENTRY(entryvec, i);
		union_dl = inner_int_union(datum_l, datum_alpha);
		union_dr = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_dl, &size_alpha);
		rt__int_size(union_dr, &size_beta);

		/* pick which page to add it to */
		if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
		{
			if (datum_l)
				pfree(datum_l);
			if (union_dr)
				pfree(union_dr);
			datum_l = union_dl;
			size_l = size_alpha;
			*left++ = i;
			v->spl_nleft++;
		}
		else
		{
			if (datum_r)
				pfree(datum_r);
			if (union_dl)
				pfree(union_dl);
			datum_r = union_dr;
			size_r = size_beta;
			*right++ = i;
			v->spl_nright++;
		}
	}
	pfree(costvector);
	*right = *left = FirstOffsetNumber;

	v->spl_ldatum = PointerGetDatum(datum_l);
	v->spl_rdatum = PointerGetDatum(datum_r);

	PG_RETURN_POINTER(v);
}
예제 #3
0
Datum
gmol_picksplit(PG_FUNCTION_ARGS)
{
  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
  OffsetNumber k, j;
  bytea *datum_l, *datum_r;
  int32 size_alpha, size_beta;
  int32 size_waste, waste = -1;
  int32 nbytes;
  OffsetNumber seed_1 = 0, seed_2 = 0;
  OffsetNumber *left, *right;
  OffsetNumber maxoff;
  int i, signlen = 0;
  SPLITCOST *costvector;

  maxoff = entryvec->n - 1;
  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
  v->spl_left = (OffsetNumber *) palloc(nbytes);
  v->spl_right = (OffsetNumber *) palloc(nbytes);

  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
    if (signlen == 0) {
        signlen = SIGLEN(GETENTRY(entryvec, k));
    }
    for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
      size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
      if (size_waste > waste) {
	waste = size_waste;
	seed_1 = k;
	seed_2 = j;
      }
    }
  }

  if (signlen == 0) {
    signlen = SIGLEN(GETENTRY(entryvec, maxoff));
  }

  left = v->spl_left;
  v->spl_nleft = 0;
  right = v->spl_right;
  v->spl_nright = 0;

  if (signlen == 0 || waste == 0) {
    /* all entries a alltrue  or all the same */
    
    for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) {
      if (k <= (maxoff - FirstOffsetNumber + 1) / 2) {
	v->spl_left[v->spl_nleft] = k;
	v->spl_nleft++;
      }
      else {
	v->spl_right[v->spl_nright] = k;
	v->spl_nright++;
      }
    }

    signlen = VARSIZE(GETENTRY(entryvec, FirstOffsetNumber));
                
    datum_l = palloc(signlen);
    memcpy(datum_l, GETENTRY(entryvec, FirstOffsetNumber), signlen);
    v->spl_ldatum = PointerGetDatum(datum_l);
    datum_r = palloc(signlen);
    memcpy(datum_r, GETENTRY(entryvec, FirstOffsetNumber), signlen);
    v->spl_rdatum = PointerGetDatum(datum_r);
    
    Assert( v->spl_nleft + v->spl_nright == maxoff );
    PG_RETURN_POINTER(v);
  }

  if (seed_1 == 0 || seed_2 == 0) {
    seed_1 = 1;
    seed_2 = 2;
  }

  /* form initial .. */
  if (ISALLTRUE(GETENTRY(entryvec, seed_1))) {
    datum_l = palloc(VARHDRSZ);
    SET_VARSIZE(datum_l, VARHDRSZ);
  }
  else {
    datum_l = palloc(signlen + VARHDRSZ);
    memcpy(datum_l , GETENTRY(entryvec, seed_1) , signlen + VARHDRSZ);
  }
  
  if (ISALLTRUE(GETENTRY(entryvec, seed_2))) {
    datum_r = palloc(VARHDRSZ);
    SET_VARSIZE(datum_r, VARHDRSZ);
  }
  else {
    datum_r = palloc(signlen + VARHDRSZ);
    memcpy(datum_r , GETENTRY(entryvec, seed_2) , signlen + VARHDRSZ);
  }

  /* sort before ... */
  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
    costvector[j - 1].pos = j;
    size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
    size_beta  = hemdist(datum_r, GETENTRY(entryvec, j));
    costvector[j - 1].cost = Abs(size_alpha - size_beta);
  }
  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

  for (k = 0; k < maxoff; k++) {
    j = costvector[k].pos;
    if (j == seed_1) {
      *left++ = j;
      v->spl_nleft++;
      continue;
    }
    else if (j == seed_2) {
      *right++ = j;
      v->spl_nright++;
      continue;
    }
    
    size_alpha = hemdist(GETENTRY(entryvec, j), datum_l);
    size_beta =  hemdist(GETENTRY(entryvec, j), datum_r);
    
    if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
      if (!ISALLTRUE(datum_l)) {
	if (ISALLTRUE(GETENTRY(entryvec, j))) {
	  datum_l = palloc(VARHDRSZ);
	  SET_VARSIZE(datum_l, VARHDRSZ);
	}
	else {
	  unsigned char
	    *as = (unsigned char *)VARDATA(datum_l),
	    *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j));
	  
	  for (i=0;i<signlen;i++) {
	    as[i] |= bs[i];
	  }
	}
      }
      *left++ = j;
      v->spl_nleft++;
    }
    else {
      if (!ISALLTRUE(datum_r)) {
	if (ISALLTRUE(GETENTRY(entryvec, j))) {
	  datum_r = palloc(VARHDRSZ);
	  SET_VARSIZE(datum_r, VARHDRSZ);
	}
	else {
	  unsigned char
	    *as = (unsigned char *)VARDATA(datum_r),
	    *bs = (unsigned char *)VARDATA(GETENTRY(entryvec, j));

	  for (i=0;i<signlen;i++) {
	    as[i] |= bs[i];
	  }
	}
      }
      *right++ = j;
      v->spl_nright++;
    }
  }
  
  *right = *left = FirstOffsetNumber;
  v->spl_ldatum = PointerGetDatum(datum_l);
  v->spl_rdatum = PointerGetDatum(datum_r);
  
  Assert( v->spl_nleft + v->spl_nright == maxoff );

  PG_RETURN_POINTER(v);
}