Datum *
extractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *nentries,
				bool *needUnique)
{
	Datum	   *entries;

	entries = (Datum *) DatumGetPointer(FunctionCall2(
									   &ginstate->extractValueFn[attnum - 1],
													  value,
													PointerGetDatum(nentries)
													  ));

	if (entries == NULL)
		*nentries = 0;

	*needUnique = FALSE;
	if (*nentries > 1)
	{
		cmpEntriesData arg;

		arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
		arg.needUnique = needUnique;
		qsort_arg(entries, *nentries, sizeof(Datum),
				  (qsort_arg_comparator) cmpEntries, (void *) &arg);
	}

	return entries;
}
Exemplo n.º 2
0
/* Sort the given data (len >= 2).  Return true if any duplicates found */
bool
isort(int32 *a, int len)
{
	bool		r = false;

	qsort_arg(a, len, sizeof(int32), isort_cmp, (void *) &r);
	return r;
}
Exemplo n.º 3
0
GIST_SPLITVEC *
gbt_num_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
				  const gbtree_ninfo *tinfo, FmgrInfo *flinfo)
{
	OffsetNumber i,
				maxoff = entryvec->n - 1;
	Nsrt	   *arr;
	int			nbytes;

	arr = (Nsrt *) palloc((maxoff + 1) * sizeof(Nsrt));
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);
	v->spl_ldatum = PointerGetDatum(0);
	v->spl_rdatum = PointerGetDatum(0);
	v->spl_nleft = 0;
	v->spl_nright = 0;

	/* Sort entries */

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		arr[i].t = (GBT_NUMKEY *) DatumGetPointer((entryvec->vector[i].key));
		arr[i].i = i;
	}
	qsort_arg((void *) &arr[FirstOffsetNumber], maxoff - FirstOffsetNumber + 1, sizeof(Nsrt), (qsort_arg_comparator) tinfo->f_cmp, (void *) flinfo);

	/* We do simply create two parts */

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
		{
			gbt_num_bin_union(&v->spl_ldatum, arr[i].t, tinfo, flinfo);
			v->spl_left[v->spl_nleft] = arr[i].i;
			v->spl_nleft++;
		}
		else
		{
			gbt_num_bin_union(&v->spl_rdatum, arr[i].t, tinfo, flinfo);
			v->spl_right[v->spl_nright] = arr[i].i;
			v->spl_nright++;
		}
	}

	return v;
}
Exemplo n.º 4
0
/*
 * Array selectivity estimation based on most common elements statistics
 *
 * This function just deconstructs and sorts the array constant's contents,
 * and then passes the problem on to mcelem_array_contain_overlap_selec or
 * mcelem_array_contained_selec depending on the operator.
 */
static Selectivity
mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry,
				   Datum *mcelem, int nmcelem,
				   float4 *numbers, int nnumbers,
				   float4 *hist, int nhist,
				   Oid operator, FmgrInfo *cmpfunc)
{
	Selectivity selec;
	int			num_elems;
	Datum	   *elem_values;
	bool	   *elem_nulls;
	bool		null_present;
	int			nonnull_nitems;
	int			i;

	/*
	 * Prepare constant array data for sorting.  Sorting lets us find unique
	 * elements and efficiently merge with the MCELEM array.
	 */
	deconstruct_array(array,
					  typentry->type_id,
					  typentry->typlen,
					  typentry->typbyval,
					  typentry->typalign,
					  &elem_values, &elem_nulls, &num_elems);

	/* Collapse out any null elements */
	nonnull_nitems = 0;
	null_present = false;
	for (i = 0; i < num_elems; i++)
	{
		if (elem_nulls[i])
			null_present = true;
		else
			elem_values[nonnull_nitems++] = elem_values[i];
	}

	/*
	 * Query "column @> '{anything, null}'" matches nothing.  For the other
	 * two operators, presence of a null in the constant can be ignored.
	 */
	if (null_present && operator == OID_ARRAY_CONTAINS_OP)
	{
		pfree(elem_values);
		pfree(elem_nulls);
		return (Selectivity) 0.0;
	}

	/* Sort extracted elements using their default comparison function. */
	qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
			  element_compare, cmpfunc);

	/* Separate cases according to operator */
	if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
		selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
												   numbers, nnumbers,
												   elem_values, nonnull_nitems,
												   operator, cmpfunc);
	else if (operator == OID_ARRAY_CONTAINED_OP)
		selec = mcelem_array_contained_selec(mcelem, nmcelem,
											 numbers, nnumbers,
											 elem_values, nonnull_nitems,
											 hist, nhist,
											 operator, cmpfunc);
	else
	{
		elog(ERROR, "arraycontsel called for unrecognized operator %u",
			 operator);
		selec = 0.0;			/* keep compiler quiet */
	}

	pfree(elem_values);
	pfree(elem_nulls);
	return selec;
}
Exemplo n.º 5
0
/*
 * ndistinct_for_combination
 *		Estimates number of distinct values in a combination of columns.
 *
 * This uses the same ndistinct estimator as compute_scalar_stats() in
 * ANALYZE, i.e.,
 *		n*d / (n - f1 + f1*n/N)
 *
 * except that instead of values in a single column we are dealing with
 * combination of multiple columns.
 */
static double
ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
						  VacAttrStats **stats, int k, int *combination)
{
	int			i,
				j;
	int			f1,
				cnt,
				d;
	bool	   *isnull;
	Datum	   *values;
	SortItem   *items;
	MultiSortSupport mss;

	mss = multi_sort_init(k);

	/*
	 * In order to determine the number of distinct elements, create separate
	 * values[]/isnull[] arrays with all the data we have, then sort them
	 * using the specified column combination as dimensions.  We could try to
	 * sort in place, but it'd probably be more complex and bug-prone.
	 */
	items = (SortItem *) palloc(numrows * sizeof(SortItem));
	values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
	isnull = (bool *) palloc0(sizeof(bool) * numrows * k);

	for (i = 0; i < numrows; i++)
	{
		items[i].values = &values[i * k];
		items[i].isnull = &isnull[i * k];
	}

	/*
	 * For each dimension, set up sort-support and fill in the values from the
	 * sample data.
	 */
	for (i = 0; i < k; i++)
	{
		VacAttrStats *colstat = stats[combination[i]];
		TypeCacheEntry *type;

		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
		if (type->lt_opr == InvalidOid) /* shouldn't happen */
			elog(ERROR, "cache lookup failed for ordering operator for type %u",
				 colstat->attrtypid);

		/* prepare the sort function for this dimension */
		multi_sort_add_dimension(mss, i, type->lt_opr);

		/* accumulate all the data for this dimension into the arrays */
		for (j = 0; j < numrows; j++)
		{
			items[j].values[i] =
				heap_getattr(rows[j],
							 colstat->attr->attnum,
							 colstat->tupDesc,
							 &items[j].isnull[i]);
		}
	}

	/* We can sort the array now ... */
	qsort_arg((void *) items, numrows, sizeof(SortItem),
			  multi_sort_compare, mss);

	/* ... and count the number of distinct combinations */

	f1 = 0;
	cnt = 1;
	d = 1;
	for (i = 1; i < numrows; i++)
	{
		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
		{
			if (cnt == 1)
				f1 += 1;

			d++;
			cnt = 0;
		}

		cnt += 1;
	}

	if (cnt == 1)
		f1 += 1;

	return estimate_ndistinct(totalrows, numrows, d, f1);
}
Exemplo n.º 6
0
void
qsort_arg(void *a, size_t n, size_t es, qsort_arg_comparator cmp, void *arg)
{
	char	   *pa,
			   *pb,
			   *pc,
			   *pd,
			   *pl,
			   *pm,
			   *pn;
	int			d,
				r,
				swaptype,
				presorted;

loop:SWAPINIT(a, es);
	if (n < 7)
	{
		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
			for (pl = pm; pl > (char *) a && cmp(pl - es, pl, arg) > 0;
				 pl -= es)
				swap(pl, pl - es);
		return;
	}
	presorted = 1;
	for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
	{
		if (cmp(pm - es, pm, arg) > 0)
		{
			presorted = 0;
			break;
		}
	}
	if (presorted)
		return;
	pm = (char *) a + (n / 2) * es;
	if (n > 7)
	{
		pl = (char *) a;
		pn = (char *) a + (n - 1) * es;
		if (n > 40)
		{
			d = (n / 8) * es;
			pl = med3(pl, pl + d, pl + 2 * d, cmp, arg);
			pm = med3(pm - d, pm, pm + d, cmp, arg);
			pn = med3(pn - 2 * d, pn - d, pn, cmp, arg);
		}
		pm = med3(pl, pm, pn, cmp, arg);
	}
	swap(static_cast<char *>(a), pm);
	pa = pb = (char *) a + es;
	pc = pd = (char *) a + (n - 1) * es;
	for (;;)
	{
		while (pb <= pc && (r = cmp(pb, a, arg)) <= 0)
		{
			if (r == 0)
			{
				swap(pa, pb);
				pa += es;
			}
			pb += es;
		}
		while (pb <= pc && (r = cmp(pc, a, arg)) >= 0)
		{
			if (r == 0)
			{
				swap(pc, pd);
				pd -= es;
			}
			pc -= es;
		}
		if (pb > pc)
			break;
		swap(pb, pc);
		pb += es;
		pc -= es;
	}
	pn = (char *) a + n * es;
	r = Min(pa - (char *) a, pb - pa);
	vecswap(static_cast<char *>(a), pb - r, r);
	r = Min(pd - pc, pn - pd - es);
	vecswap(pb, pn - r, r);
	if ((r = pb - pa) > es)
		qsort_arg(a, r / es, es, cmp, arg);
	if ((r = pd - pc) > es)
	{
		/* Iterate rather than recurse to save stack space */
		a = pn - r;
		n = r / es;
		goto loop;
	}
/*		qsort_arg(pn - r, r / es, es, cmp, arg);*/
}
Exemplo n.º 7
0
GIST_SPLITVEC *
gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
				  Oid collation, const gbtree_vinfo *tinfo)
{
	OffsetNumber i,
				maxoff = entryvec->n - 1;
	Vsrt	   *arr;
	int			svcntr = 0,
				nbytes;
	char	   *cur;
	GBT_VARKEY **sv = NULL;
	gbt_vsrt_arg varg;

	arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt));
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);
	v->spl_ldatum = PointerGetDatum(0);
	v->spl_rdatum = PointerGetDatum(0);
	v->spl_nleft = 0;
	v->spl_nright = 0;

	sv = palloc(sizeof(bytea *) * (maxoff + 1));

	/* Sort entries */

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		GBT_VARKEY_R ro;

		cur = (char *) DatumGetPointer(entryvec->vector[i].key);
		ro = gbt_var_key_readable((GBT_VARKEY *) cur);
		if (ro.lower == ro.upper)		/* leaf */
		{
			sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo);
			arr[i].t = sv[svcntr];
			if (sv[svcntr] != (GBT_VARKEY *) cur)
				svcntr++;
		}
		else
			arr[i].t = (GBT_VARKEY *) cur;
		arr[i].i = i;
	}

	/* sort */
	varg.tinfo = tinfo;
	varg.collation = collation;
	qsort_arg((void *) &arr[FirstOffsetNumber],
			  maxoff - FirstOffsetNumber + 1,
			  sizeof(Vsrt),
			  gbt_vsrt_cmp,
			  (void *) &varg);

	/* We do simply create two parts */

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
		{
			gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo);
			v->spl_left[v->spl_nleft] = arr[i].i;
			v->spl_nleft++;
		}
		else
		{
			gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo);
			v->spl_right[v->spl_nright] = arr[i].i;
			v->spl_nright++;
		}
	}

	/* Truncate (=compress) key */
	if (tinfo->trnc)
	{
		int32		ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo);
		int32		lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo);
		GBT_VARKEY *dl;
		GBT_VARKEY *dr;

		ll = Max(ll, lr);
		ll++;

		dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo);
		dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo);
		v->spl_ldatum = PointerGetDatum(dl);
		v->spl_rdatum = PointerGetDatum(dr);
	}

	return v;
}
Exemplo n.º 8
0
/*
 * validates functional dependency on the data
 *
 * An actual work horse of detecting functional dependencies. Given a variation
 * of k attributes, it checks that the first (k-1) are sufficient to determine
 * the last one.
 */
static double
dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
				  VacAttrStats **stats, Bitmapset *attrs)
{
	int			i,
				j;
	int			nvalues = numrows * k;
	MultiSortSupport mss;
	SortItem   *items;
	Datum	   *values;
	bool	   *isnull;
	int		   *attnums;

	/* counters valid within a group */
	int			group_size = 0;
	int			n_violations = 0;

	/* total number of rows supporting (consistent with) the dependency */
	int			n_supporting_rows = 0;

	/* Make sure we have at least two input attributes. */
	Assert(k >= 2);

	/* sort info for all attributes columns */
	mss = multi_sort_init(k);

	/* data for the sort */
	items = (SortItem *) palloc(numrows * sizeof(SortItem));
	values = (Datum *) palloc(sizeof(Datum) * nvalues);
	isnull = (bool *) palloc(sizeof(bool) * nvalues);

	/* fix the pointers to values/isnull */
	for (i = 0; i < numrows; i++)
	{
		items[i].values = &values[i * k];
		items[i].isnull = &isnull[i * k];
	}

	/*
	 * Transform the bms into an array, to make accessing i-th member easier.
	 */
	attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs));
	i = 0;
	j = -1;
	while ((j = bms_next_member(attrs, j)) >= 0)
		attnums[i++] = j;

	/*
	 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
	 *
	 * (a) sort the data lexicographically
	 *
	 * (b) split the data into groups by first (k-1) columns
	 *
	 * (c) for each group count different values in the last column
	 */

	/* prepare the sort function for the first dimension, and SortItem array */
	for (i = 0; i < k; i++)
	{
		VacAttrStats *colstat = stats[dependency[i]];
		TypeCacheEntry *type;

		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
		if (type->lt_opr == InvalidOid) /* shouldn't happen */
			elog(ERROR, "cache lookup failed for ordering operator for type %u",
				 colstat->attrtypid);

		/* prepare the sort function for this dimension */
		multi_sort_add_dimension(mss, i, type->lt_opr);

		/* accumulate all the data for both columns into an array and sort it */
		for (j = 0; j < numrows; j++)
		{
			items[j].values[i] =
				heap_getattr(rows[j], attnums[dependency[i]],
							 stats[i]->tupDesc, &items[j].isnull[i]);
		}
	}

	/* sort the items so that we can detect the groups */
	qsort_arg((void *) items, numrows, sizeof(SortItem),
			  multi_sort_compare, mss);

	/*
	 * Walk through the sorted array, split it into rows according to the
	 * first (k-1) columns. If there's a single value in the last column, we
	 * count the group as 'supporting' the functional dependency. Otherwise we
	 * count it as contradicting.
	 */

	/* start with the first row forming a group */
	group_size = 1;

	/* loop 1 beyond the end of the array so that we count the final group */
	for (i = 1; i <= numrows; i++)
	{
		/*
		 * Check if the group ended, which may be either because we processed
		 * all the items (i==numrows), or because the i-th item is not equal
		 * to the preceding one.
		 */
		if (i == numrows ||
			multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
		{
			/*
			 * If no violations were found in the group then track the rows of
			 * the group as supporting the functional dependency.
			 */
			if (n_violations == 0)
				n_supporting_rows += group_size;

			/* Reset counters for the new group */
			n_violations = 0;
			group_size = 1;
			continue;
		}
		/* first columns match, but the last one does not (so contradicting) */
		else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
			n_violations++;

		group_size++;
	}

	pfree(items);
	pfree(values);
	pfree(isnull);
	pfree(mss);

	/* Compute the 'degree of validity' as (supporting/total). */
	return (n_supporting_rows * 1.0 / numrows);
}
Exemplo n.º 9
0
/*
 * Sort an array of WordEntryIN, remove duplicates.
 * *outbuflen receives the amount of space needed for strings and positions.
 */
static int
uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
{
	int			buflen;
	WordEntryIN *ptr,
			   *res;

	Assert(l >= 1);

	if (l > 1)
		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
				  (void *) buf);

	buflen = 0;
	res = a;
	ptr = a + 1;
	while (ptr - a < l)
	{
		if (!(ptr->entry.len == res->entry.len &&
			  strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
					  res->entry.len) == 0))
		{
			/* done accumulating data into *res, count space needed */
			buflen += res->entry.len;
			if (res->entry.haspos)
			{
				res->poslen = uniquePos(res->pos, res->poslen);
				buflen = SHORTALIGN(buflen);
				buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
			}
			res++;
			if (res != ptr)
				memcpy(res, ptr, sizeof(WordEntryIN));
		}
		else if (ptr->entry.haspos)
		{
			if (res->entry.haspos)
			{
				/* append ptr's positions to res's positions */
				int			newlen = ptr->poslen + res->poslen;

				res->pos = (WordEntryPos *)
					repalloc(res->pos, newlen * sizeof(WordEntryPos));
				memcpy(&res->pos[res->poslen], ptr->pos,
					   ptr->poslen * sizeof(WordEntryPos));
				res->poslen = newlen;
				pfree(ptr->pos);
			}
			else
			{
				/* just give ptr's positions to pos */
				res->entry.haspos = 1;
				res->pos = ptr->pos;
				res->poslen = ptr->poslen;
			}
		}
		ptr++;
	}

	/* count space needed for last item */
	buflen += res->entry.len;
	if (res->entry.haspos)
	{
		res->poslen = uniquePos(res->pos, res->poslen);
		buflen = SHORTALIGN(buflen);
		buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
	}

	*outbuflen = buflen;
	return res + 1 - a;
}
Exemplo n.º 10
0
Datum
tsvectorrecv(PG_FUNCTION_ARGS)
{
	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
	TSVector	vec;
	int			i;
	int32		nentries;
	int			datalen;		/* number of bytes used in the variable size
								 * area after fixed size TSVector header and
								 * WordEntries */
	Size		hdrlen;
	Size		len;			/* allocated size of vec */
	bool		needSort = false;

	nentries = pq_getmsgint(buf, sizeof(int32));
	if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
		elog(ERROR, "invalid size of tsvector");

	hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;

	len = hdrlen * 2;			/* times two to make room for lexemes */
	vec = (TSVector) palloc0(len);
	vec->size = nentries;

	datalen = 0;
	for (i = 0; i < nentries; i++)
	{
		const char *lexeme;
		uint16		npos;
		size_t		lex_len;

		lexeme = pq_getmsgstring(buf);
		npos = (uint16) pq_getmsgint(buf, sizeof(uint16));

		/* sanity checks */

		lex_len = strlen(lexeme);
		if (lex_len > MAXSTRLEN)
			elog(ERROR, "invalid tsvector: lexeme too long");

		if (datalen > MAXSTRPOS)
			elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");

		if (npos > MAXNUMPOS)
			elog(ERROR, "unexpected number of tsvector positions");

		/*
		 * Looks valid. Fill the WordEntry struct, and copy lexeme.
		 *
		 * But make sure the buffer is large enough first.
		 */
		while (hdrlen + SHORTALIGN(datalen + lex_len) +
			   (npos + 1) * sizeof(WordEntryPos) >= len)
		{
			len *= 2;
			vec = (TSVector) repalloc(vec, len);
		}

		vec->entries[i].haspos = (npos > 0) ? 1 : 0;
		vec->entries[i].len = lex_len;
		vec->entries[i].pos = datalen;

		memcpy(STRPTR(vec) + datalen, lexeme, lex_len);

		datalen += lex_len;

		if (i > 0 && WordEntryCMP(&vec->entries[i],
								  &vec->entries[i - 1],
								  STRPTR(vec)) <= 0)
			needSort = true;

		/* Receive positions */
		if (npos > 0)
		{
			uint16		j;
			WordEntryPos *wepptr;

			/*
			 * Pad to 2-byte alignment if necessary. Though we used palloc0
			 * for the initial allocation, subsequent repalloc'd memory areas
			 * are not initialized to zero.
			 */
			if (datalen != SHORTALIGN(datalen))
			{
				*(STRPTR(vec) + datalen) = '\0';
				datalen = SHORTALIGN(datalen);
			}

			memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));

			wepptr = POSDATAPTR(vec, &vec->entries[i]);
			for (j = 0; j < npos; j++)
			{
				wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
				if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
					elog(ERROR, "position information is misordered");
			}

			datalen += (npos + 1) * sizeof(WordEntry);
		}
	}

	SET_VARSIZE(vec, hdrlen + datalen);

	if (needSort)
		qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
				  compareentry, (void *) STRPTR(vec));

	PG_RETURN_TSVECTOR(vec);
}
Exemplo n.º 11
0
/*
 * Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
 * range and distribute ranges according to quadrants.
 */
Datum
spg_range_quad_picksplit(PG_FUNCTION_ARGS)
{
	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
	int			i;
	int			j;
	int			nonEmptyCount;
	RangeType  *centroid;
	bool		empty;
	TypeCacheEntry *typcache;

	/* Use the median values of lower and upper bounds as the centroid range */
	RangeBound *lowerBounds,
			   *upperBounds;

	typcache = range_get_typcache(fcinfo,
						  RangeTypeGetOid(DatumGetRangeType(in->datums[0])));

	/* Allocate memory for bounds */
	lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
	upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
	j = 0;

	/* Deserialize bounds of ranges, count non-empty ranges */
	for (i = 0; i < in->nTuples; i++)
	{
		range_deserialize(typcache, DatumGetRangeType(in->datums[i]),
						  &lowerBounds[j], &upperBounds[j], &empty);
		if (!empty)
			j++;
	}
	nonEmptyCount = j;

	/*
	 * All the ranges are empty. The best we can do is to construct an inner
	 * node with no centroid, and put all ranges into node 0. If non-empty
	 * ranges are added later, they will be routed to node 1.
	 */
	if (nonEmptyCount == 0)
	{
		out->nNodes = 2;
		out->hasPrefix = false;
		/* Prefix is empty */
		out->prefixDatum = PointerGetDatum(NULL);
		out->nodeLabels = NULL;

		out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
		out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);

		/* Place all ranges into node 0 */
		for (i = 0; i < in->nTuples; i++)
		{
			RangeType  *range = DatumGetRangeType(in->datums[i]);

			out->leafTupleDatums[i] = RangeTypeGetDatum(range);
			out->mapTuplesToNodes[i] = 0;
		}
		PG_RETURN_VOID();
	}

	/* Sort range bounds in order to find medians */
	qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
			  bound_cmp, typcache);
	qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
			  bound_cmp, typcache);

	/* Construct "centroid" range from medians of lower and upper bounds */
	centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
							   &upperBounds[nonEmptyCount / 2], false);
	out->hasPrefix = true;
	out->prefixDatum = RangeTypeGetDatum(centroid);

	/* Create node for empty ranges only if it is a root node */
	out->nNodes = (in->level == 0) ? 5 : 4;
	out->nodeLabels = NULL;		/* we don't need node labels */

	out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
	out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);

	/*
	 * Assign ranges to corresponding nodes according to quadrants relative to
	 * "centroid" range.
	 */
	for (i = 0; i < in->nTuples; i++)
	{
		RangeType  *range = DatumGetRangeType(in->datums[i]);
		int16		quadrant = getQuadrant(typcache, centroid, range);

		out->leafTupleDatums[i] = RangeTypeGetDatum(range);
		out->mapTuplesToNodes[i] = quadrant - 1;
	}

	PG_RETURN_VOID();
}
Exemplo n.º 12
0
/*
 * Extract the index key values from an indexable item
 *
 * The resulting key values are sorted, and any duplicates are removed.
 * This avoids generating redundant index entries.
 */
Datum *
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
				  Datum value, bool isNull,
				  int32 *nentries, GinNullCategory **categories)
{
	Datum	   *entries;
	bool	   *nullFlags;
	int32		i;

	/*
	 * We don't call the extractValueFn on a null item.  Instead generate a
	 * placeholder.
	 */
	if (isNull)
	{
		*nentries = 1;
		entries = (Datum *) palloc(sizeof(Datum));
		entries[0] = (Datum) 0;
		*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
		(*categories)[0] = GIN_CAT_NULL_ITEM;
		return entries;
	}

	/* OK, call the opclass's extractValueFn */
	nullFlags = NULL;			/* in case extractValue doesn't set it */
	entries = (Datum *)
		DatumGetPointer(FunctionCall3(&ginstate->extractValueFn[attnum - 1],
									  value,
									  PointerGetDatum(nentries),
									  PointerGetDatum(&nullFlags)));

	/*
	 * Generate a placeholder if the item contained no keys.
	 */
	if (entries == NULL || *nentries <= 0)
	{
		*nentries = 1;
		entries = (Datum *) palloc(sizeof(Datum));
		entries[0] = (Datum) 0;
		*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
		(*categories)[0] = GIN_CAT_EMPTY_ITEM;
		return entries;
	}

	/*
	 * If the extractValueFn didn't create a nullFlags array, create one,
	 * assuming that everything's non-null.  Otherwise, run through the
	 * array and make sure each value is exactly 0 or 1; this ensures
	 * binary compatibility with the GinNullCategory representation.
	 */
	if (nullFlags == NULL)
		nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
	else
	{
		for (i = 0; i < *nentries; i++)
			nullFlags[i] = (nullFlags[i] ? true : false);
	}
	/* now we can use the nullFlags as category codes */
	*categories = (GinNullCategory *) nullFlags;

	/*
	 * If there's more than one key, sort and unique-ify.
	 *
	 * XXX Using qsort here is notationally painful, and the overhead is
	 * pretty bad too.  For small numbers of keys it'd likely be better to
	 * use a simple insertion sort.
	 */
	if (*nentries > 1)
	{
		keyEntryData *keydata;
		cmpEntriesArg arg;

		keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
		for (i = 0; i < *nentries; i++)
		{
			keydata[i].datum = entries[i];
			keydata[i].isnull = nullFlags[i];
		}

		arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
		arg.haveDups = false;
		qsort_arg(keydata, *nentries, sizeof(keyEntryData),
				  cmpEntries, (void *) &arg);

		if (arg.haveDups)
		{
			/* there are duplicates, must get rid of 'em */
			int32		j;

			entries[0] = keydata[0].datum;
			nullFlags[0] = keydata[0].isnull;
			j = 1;
			for (i = 1; i < *nentries; i++)
			{
				if (cmpEntries(&keydata[i-1], &keydata[i], &arg) != 0)
				{
					entries[j] = keydata[i].datum;
					nullFlags[j] = keydata[i].isnull;
					j++;
				}
			}
			*nentries = j;
		}
		else
		{
			/* easy, no duplicates */
			for (i = 0; i < *nentries; i++)
			{
				entries[i] = keydata[i].datum;
				nullFlags[i] = keydata[i].isnull;
			}
		}

		pfree(keydata);
	}

	return entries;
}
Exemplo n.º 13
0
/*
 * performs compaction of the sorted set
 *
 * Sorts the unsorted data, removes duplicate values and then merges it
 * into the already sorted part (skipping duplicate values).
 *
 * Finally, it checks whether at least ARRAY_FREE_FRACT (20%) of the array
 * is empty, and if not then resizes it.
 */
static void
compact_set(element_set_t * eset, bool need_space)
{
    char   *base = eset->data + (eset->nsorted * eset->item_size);
    char   *last = base;
    char   *curr;
    int        i;
    int        cnt = 1;
    double    free_fract;

    Assert(eset->nall > 0);
    Assert(eset->data != NULL);
    Assert(eset->nsorted <= eset->nall);
    Assert(eset->nall * eset->item_size <= eset->nbytes);

    /* if there are no new (unsorted) items, we don't need to sort */
    if (eset->nall > eset->nsorted)
    {
        /*
         * sort the array with new items, but only when not already sorted
         *
         * TODO Consider replacing this insert-sort for small number of items
         * (for <64 items it might be faster than qsort)
         */
        qsort_arg(eset->data + eset->nsorted * eset->item_size,
                  eset->nall - eset->nsorted, eset->item_size,
                  compare_items, &eset->item_size);

        /*
         * Remove duplicate values from the sorted array. That is - walk through
         * the array, compare each item with the preceding one, and only keep it
         * if they differ. We skip the first value, as it's always unique (there
         * is no preceding value it might be equal to).
         */
        for (i = 1; i < eset->nall - eset->nsorted; i++)
        {
            curr = base + (i * eset->item_size);

            /* items differ (keep the item) */
            if (memcmp(last, curr, eset->item_size) != 0)
            {
                last += eset->item_size;
                cnt  += 1;

                /* only copy if really needed */
                if (last != curr)
                    memcpy(last, curr, eset->item_size);
            }
        }

        /* duplicities removed -> update the number of items in this part */
        eset->nall = eset->nsorted + cnt;

        /* If this is the first sorted part, we can just use it as the 'sorted' part. */
        if (eset->nsorted == 0)
            eset->nsorted = eset->nall;

        /*
         * TODO Another optimization opportunity is that we don't really need to
         *        merge the arrays, if we freed enough space by processing the new
         *        items. We may postpone that until the last call (when finalizing
         *        the aggregate). OTOH if that happens, it shouldn't be that
         *        expensive to merge because the number of new items will be small
         *        (as we've removed a enough duplicities). But we still need to
         *        shuffle the data around, which wastes memory bandwidth.
         */

        /* If a merge is needed, walk through the arrays and keep unique values. */
        if (eset->nsorted < eset->nall)
        {
            MemoryContext oldctx = MemoryContextSwitchTo(eset->aggctx);

            /* allocate new array for the result */
            char * data = palloc(eset->nbytes);
            char * ptr = data;

            /* already sorted array */
            char * a = eset->data;
            char * a_max = eset->data + eset->nsorted * eset->item_size;

            /* the new array */
            char * b = eset->data + (eset->nsorted * eset->item_size);
            char * b_max = eset->data + eset->nall * eset->item_size;

            MemoryContextSwitchTo(oldctx);

            /*
             * TODO There's a possibility for optimization - if we get already
             *        sorted items (e.g. because of a subplan), we can just copy the
             *        arrays. The check is as simple as checking
             *
             *        (a_first > b_last) || (a_last < b_first).
             *
             *        OTOH this is probably very unlikely to happen in practice.
             */

            while (true)
            {
                int r = memcmp(a, b, eset->item_size);

                /*
                 * If both values are the same, copy one of them into the result and increment
                 * both. Otherwise, increment only the smaller value.
                 */
                if (r == 0)
                {
                    memcpy(ptr, a, eset->item_size);
                    a += eset->item_size;
                    b += eset->item_size;
                }
                else if (r < 0)
                {
                    memcpy(ptr, a, eset->item_size);
                    a += eset->item_size;
                }
                else
                {
                    memcpy(ptr, b, eset->item_size);
                    b += eset->item_size;
                }

                ptr += eset->item_size;

                /*
                 * If we reached the end of (at least) one of the arrays, copy all
                 * the remaining items and we're done.
                 */
                if ((a == a_max) || (b == b_max))
                {
                    if (a != a_max)         /* b ended -> copy rest of a */
                    {
                        memcpy(ptr, a, a_max - a);
                        ptr += (a_max - a);
                    }
                    else if (b != b_max)    /* a ended -> copy rest of b */
                    {
                        memcpy(ptr, b, b_max - b);
                        ptr += (b_max - b);
                    }

                    break;
                }
            }

            Assert((ptr - data) <= (eset->nall * eset->item_size));

            /*
             * Update the counts with the result of the merge (there might be
             * duplicities between the two parts, and we have eliminated them).
             */
            eset->nsorted = (ptr - data) / eset->item_size;
            eset->nall = eset->nsorted;
            pfree(eset->data);
            eset->data = data;
        }
    }

    Assert(eset->nall == eset->nsorted);

    /* compute free space as a fraction of the total size */
    free_fract
        = (eset->nbytes - eset->nall * eset->item_size) * 1.0 / eset->nbytes;

    /*
     * If we need space for more items (e.g. not when finalizing the aggregate
     * result), enlarge the array when needed. We require ARRAY_FREE_FRACT of
     * the space to be free.
     */
    if (need_space && (free_fract < ARRAY_FREE_FRACT))
    {
        /*
         * For small requests, we simply double the array size, because that's
         * what AllocSet will give use anyway. No point in trying to save
         * memory by growing the array slower.
         *
         * After reaching ALLOCSET_SEPARATE_THRESHOLD, the memory is allocated
         * in separate blocks, thus we can be smarter and grow the memory
         * a bit slower (just enough to get the 20% free space).
         *
         * XXX If the memory context uses smaller blocks, the switch to special
         * blocks may happen before ALLOCSET_SEPARATE_THRESHOLD. This limit
         * is simply global guarantee for all possible AllocSets.
         */
        if ((eset->nbytes / 0.8) < ALLOCSET_SEPARATE_THRESHOLD)
            eset->nbytes *= 2;
        else
            eset->nbytes /= 0.8;

        eset->data = repalloc(eset->data, eset->nbytes);
    }
}
Exemplo n.º 14
0
void BLC_PREFIX(qsort)(void *a, size_t n, size_t es, cmp_t cmp)
#endif
{
	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
	size_t d, r;
	int cmp_result;
	int swaptype, swap_cnt;

loop:	SWAPINIT(a, es);
	swap_cnt = 0;
	if (n < 7) {
		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
			for (pl = pm; 
			     pl > (char *)a && CMP(arg, pl - es, pl) > 0;
			     pl -= es)
				swap(pl, pl - es);
		return;
	}
	pm = (char *)a + (n / 2) * es;
	if (n > 7) {
		pl = a;
		pn = (char *)a + (n - 1) * es;
		if (n > 40) {
			d = (n / 8) * es;
			pl = med3(pl, pl + d, pl + 2 * d, cmp, arg);
			pm = med3(pm - d, pm, pm + d, cmp, arg);
			pn = med3(pn - 2 * d, pn - d, pn, cmp, arg);
		}
		pm = med3(pl, pm, pn, cmp, arg);
	}
	swap(a, pm);
	pa = pb = (char *)a + es;

	pc = pd = (char *)a + (n - 1) * es;
	for (;;) {
		while (pb <= pc && (cmp_result = CMP(arg, pb, a)) <= 0) {
			if (cmp_result == 0) {
				swap_cnt = 1;
				swap(pa, pb);
				pa += es;
			}
			pb += es;
		}
		while (pb <= pc && (cmp_result = CMP(arg, pc, a)) >= 0) {
			if (cmp_result == 0) {
				swap_cnt = 1;
				swap(pc, pd);
				pd -= es;
			}
			pc -= es;
		}
		if (pb > pc)
			break;
		swap(pb, pc);
		swap_cnt = 1;
		pb += es;
		pc -= es;
	}
	if (swap_cnt == 0) {  /* Switch to insertion sort */
		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
			for (pl = pm; 
			     pl > (char *)a && CMP(arg, pl - es, pl) > 0;
			     pl -= es)
				swap(pl, pl - es);
		return;
	}

	pn = (char *)a + n * es;
	r = min(pa - (char *)a, pb - pa);
	vecswap(a, pb - r, r);
	r = min(pd - pc, pn - pd - es);
	vecswap(pb, pn - r, r);
	if ((r = pb - pa) > es)
#ifdef I_AM_QSORT_ARG
		qsort_arg(a, r / es, es, cmp, arg);
#else
		qsort(a, r / es, es, cmp);
#endif
	if ((r = pd - pc) > es) {
		/* Iterate rather than recurse to save stack space */
		a = pn - r;
		n = r / es;
		goto loop;
	}
/*		qsort(pn - r, r / es, es, cmp);*/
}