Пример #1
0
StreamBatch *StreamBatchCreate(Bitmapset *readers, int num_tuples)
{
	char *ptr = ShmemDynAlloc0(sizeof(StreamBatch) + BITMAPSET_SIZE(readers->nwords) + (bms_num_members(readers) * sizeof(int)));
	StreamBatch *batch = (StreamBatch *) ptr;
	int cq_id;
	int i = 0;

	batch->id = rand() ^ (int) MyProcPid;
	batch->num_tups = num_tuples;
	batch->num_wtups = bms_num_members(readers) * num_tuples;
	SpinLockInit(&batch->mutex);

	ptr += sizeof(StreamBatch);
	batch->readers = (Bitmapset *) ptr;
	memcpy(batch->readers, readers, BITMAPSET_SIZE(readers->nwords));


	ptr += BITMAPSET_SIZE(readers->nwords);
	batch->proc_runs = (int *) ptr;

	readers = bms_copy(readers);
	while ((cq_id = bms_first_member(readers)) != -1)
	{
		CQProcEntry *pentry = GetCQProcEntry(cq_id);
		batch->proc_runs[i] = Max(pentry->proc_runs, pentry->pg_size);
		i++;
	}
	pfree(readers);

	return batch;
}
Пример #2
0
void StreamBatchWaitAndRemove(StreamBatch *batch)
{
	int cycle = 0;

	while (!StreamBatchAllAcked(batch))
	{
		if (cycle % CHECK_CRASH_CYCLES == 0)
		{
			int num_crashes = num_cq_crashes(batch);

			cycle = 0;

			if (num_crashes == 0)
				continue;

			// All tuples have been read, and we've received acks from all workers that didn't crash.
			if (StreamBatchAllRead(batch) &&
					batch->num_wacks >= (batch->num_tups * (bms_num_members(batch->readers) - num_crashes)))
				break;
		}

		pg_usleep(SLEEP_MS * 1000);

		cycle++;
	}

	ShmemDynFree(batch);
}
Пример #3
0
/*
 * statext_ndistinct_build
 *		Compute ndistinct coefficient for the combination of attributes.
 *
 * This computes the ndistinct estimate using the same estimator used
 * in analyze.c and then computes the coefficient.
 */
MVNDistinct *
statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
						Bitmapset *attrs, VacAttrStats **stats)
{
	MVNDistinct *result;
	int			k;
	int			itemcnt;
	int			numattrs = bms_num_members(attrs);
	int			numcombs = num_combinations(numattrs);

	result = palloc(offsetof(MVNDistinct, items) +
					numcombs * sizeof(MVNDistinctItem));
	result->magic = STATS_NDISTINCT_MAGIC;
	result->type = STATS_NDISTINCT_TYPE_BASIC;
	result->nitems = numcombs;

	itemcnt = 0;
	for (k = 2; k <= numattrs; k++)
	{
		int		   *combination;
		CombinationGenerator *generator;

		/* generate combinations of K out of N elements */
		generator = generator_init(numattrs, k);

		while ((combination = generator_next(generator)))
		{
			MVNDistinctItem *item = &result->items[itemcnt];
			int			j;

			item->attrs = NULL;
			for (j = 0; j < k; j++)
				item->attrs = bms_add_member(item->attrs,
											 stats[combination[j]]->attr->attnum);
			item->ndistinct =
				ndistinct_for_combination(totalrows, numrows, rows,
										  stats, k, combination);

			itemcnt++;
			Assert(itemcnt <= result->nitems);
		}

		generator_free(generator);
	}

	/* must consume exactly the whole output array */
	Assert(itemcnt == result->nitems);

	return result;
}
Пример #4
0
/*
 * find_strongest_dependency
 *		find the strongest dependency on the attributes
 *
 * When applying functional dependencies, we start with the strongest
 * dependencies. That is, we select the dependency that:
 *
 * (a) has all attributes covered by equality clauses
 *
 * (b) has the most attributes
 *
 * (c) has the highest degree of validity
 *
 * This guarantees that we eliminate the most redundant conditions first
 * (see the comment in dependencies_clauselist_selectivity).
 */
static MVDependency *
find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
						  Bitmapset *attnums)
{
	int			i;
	MVDependency *strongest = NULL;

	/* number of attnums in clauses */
	int			nattnums = bms_num_members(attnums);

	/*
	 * Iterate over the MVDependency items and find the strongest one from the
	 * fully-matched dependencies. We do the cheap checks first, before
	 * matching it against the attnums.
	 */
	for (i = 0; i < dependencies->ndeps; i++)
	{
		MVDependency *dependency = dependencies->deps[i];

		/*
		 * Skip dependencies referencing more attributes than available
		 * clauses, as those can't be fully matched.
		 */
		if (dependency->nattributes > nattnums)
			continue;

		if (strongest)
		{
			/* skip dependencies on fewer attributes than the strongest. */
			if (dependency->nattributes < strongest->nattributes)
				continue;

			/* also skip weaker dependencies when attribute count matches */
			if (strongest->nattributes == dependency->nattributes &&
				strongest->degree > dependency->degree)
				continue;
		}

		/*
		 * this dependency is stronger, but we must still check that it's
		 * fully matched to these attnums. We perform this check last as it's
		 * slightly more expensive than the previous checks.
		 */
		if (dependency_is_fully_matched(dependency, attnums))
			strongest = dependency; /* save new best match */
	}

	return strongest;
}
Пример #5
0
/*
 * statext_ndistinct_serialize
 *		serialize ndistinct to the on-disk bytea format
 */
bytea *
statext_ndistinct_serialize(MVNDistinct *ndistinct)
{
	int			i;
	bytea	   *output;
	char	   *tmp;
	Size		len;

	Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
	Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);

	/*
	 * Base size is size of scalar fields in the struct, plus one base struct
	 * for each item, including number of items for each.
	 */
	len = VARHDRSZ + SizeOfMVNDistinct +
		ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));

	/* and also include space for the actual attribute numbers */
	for (i = 0; i < ndistinct->nitems; i++)
	{
		int			nmembers;

		nmembers = bms_num_members(ndistinct->items[i].attrs);
		Assert(nmembers >= 2);
		len += sizeof(AttrNumber) * nmembers;
	}

	output = (bytea *) palloc(len);
	SET_VARSIZE(output, len);

	tmp = VARDATA(output);

	/* Store the base struct values (magic, type, nitems) */
	memcpy(tmp, &ndistinct->magic, sizeof(uint32));
	tmp += sizeof(uint32);
	memcpy(tmp, &ndistinct->type, sizeof(uint32));
	tmp += sizeof(uint32);
	memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
	tmp += sizeof(uint32);

	/*
	 * store number of attributes and attribute numbers for each ndistinct
	 * entry
	 */
	for (i = 0; i < ndistinct->nitems; i++)
	{
		MVNDistinctItem item = ndistinct->items[i];
		int			nmembers = bms_num_members(item.attrs);
		int			x;

		memcpy(tmp, &item.ndistinct, sizeof(double));
		tmp += sizeof(double);
		memcpy(tmp, &nmembers, sizeof(int));
		tmp += sizeof(int);

		x = -1;
		while ((x = bms_next_member(item.attrs, x)) >= 0)
		{
			AttrNumber	value = (AttrNumber) x;

			memcpy(tmp, &value, sizeof(AttrNumber));
			tmp += sizeof(AttrNumber);
		}

		Assert(tmp <= ((char *) output + len));
	}

	return output;
}
Пример #6
0
/*
 * detects functional dependencies between groups of columns
 *
 * Generates all possible subsets of columns (variations) and computes
 * the degree of validity for each one. For example when creating statistics
 * on three columns (a,b,c) there are 9 possible dependencies
 *
 *	   two columns			  three columns
 *	   -----------			  -------------
 *	   (a) -> b				  (a,b) -> c
 *	   (a) -> c				  (a,c) -> b
 *	   (b) -> a				  (b,c) -> a
 *	   (b) -> c
 *	   (c) -> a
 *	   (c) -> b
 */
MVDependencies *
statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
						   VacAttrStats **stats)
{
	int			i,
				j,
				k;
	int			numattrs;
	int		   *attnums;

	/* result */
	MVDependencies *dependencies = NULL;

	numattrs = bms_num_members(attrs);

	/*
	 * Transform the bms into an array, to make accessing i-th member easier.
	 */
	attnums = palloc(sizeof(int) * bms_num_members(attrs));
	i = 0;
	j = -1;
	while ((j = bms_next_member(attrs, j)) >= 0)
		attnums[i++] = j;

	Assert(numattrs >= 2);

	/*
	 * We'll try build functional dependencies starting from the smallest ones
	 * covering just 2 columns, to the largest ones, covering all columns
	 * included in the statistics object.  We start from the smallest ones
	 * because we want to be able to skip already implied ones.
	 */
	for (k = 2; k <= numattrs; k++)
	{
		AttrNumber *dependency; /* array with k elements */

		/* prepare a DependencyGenerator of variation */
		DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);

		/* generate all possible variations of k values (out of n) */
		while ((dependency = DependencyGenerator_next(DependencyGenerator)))
		{
			double		degree;
			MVDependency *d;

			/* compute how valid the dependency seems */
			degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);

			/*
			 * if the dependency seems entirely invalid, don't store it
			 */
			if (degree == 0.0)
				continue;

			d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
										 + k * sizeof(AttrNumber));

			/* copy the dependency (and keep the indexes into stxkeys) */
			d->degree = degree;
			d->nattributes = k;
			for (i = 0; i < k; i++)
				d->attributes[i] = attnums[dependency[i]];

			/* initialize the list of dependencies */
			if (dependencies == NULL)
			{
				dependencies
					= (MVDependencies *) palloc0(sizeof(MVDependencies));

				dependencies->magic = STATS_DEPS_MAGIC;
				dependencies->type = STATS_DEPS_TYPE_BASIC;
				dependencies->ndeps = 0;
			}

			dependencies->ndeps++;
			dependencies = (MVDependencies *) repalloc(dependencies,
													   offsetof(MVDependencies, deps)
													   + dependencies->ndeps * sizeof(MVDependency));

			dependencies->deps[dependencies->ndeps - 1] = d;
		}

		/*
		 * we're done with variations of k elements, so free the
		 * DependencyGenerator
		 */
		DependencyGenerator_free(DependencyGenerator);
	}

	return dependencies;
}
Пример #7
0
/*
 * validates functional dependency on the data
 *
 * An actual work horse of detecting functional dependencies. Given a variation
 * of k attributes, it checks that the first (k-1) are sufficient to determine
 * the last one.
 */
static double
dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
				  VacAttrStats **stats, Bitmapset *attrs)
{
	int			i,
				j;
	int			nvalues = numrows * k;
	MultiSortSupport mss;
	SortItem   *items;
	Datum	   *values;
	bool	   *isnull;
	int		   *attnums;

	/* counters valid within a group */
	int			group_size = 0;
	int			n_violations = 0;

	/* total number of rows supporting (consistent with) the dependency */
	int			n_supporting_rows = 0;

	/* Make sure we have at least two input attributes. */
	Assert(k >= 2);

	/* sort info for all attributes columns */
	mss = multi_sort_init(k);

	/* data for the sort */
	items = (SortItem *) palloc(numrows * sizeof(SortItem));
	values = (Datum *) palloc(sizeof(Datum) * nvalues);
	isnull = (bool *) palloc(sizeof(bool) * nvalues);

	/* fix the pointers to values/isnull */
	for (i = 0; i < numrows; i++)
	{
		items[i].values = &values[i * k];
		items[i].isnull = &isnull[i * k];
	}

	/*
	 * Transform the bms into an array, to make accessing i-th member easier.
	 */
	attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs));
	i = 0;
	j = -1;
	while ((j = bms_next_member(attrs, j)) >= 0)
		attnums[i++] = j;

	/*
	 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
	 *
	 * (a) sort the data lexicographically
	 *
	 * (b) split the data into groups by first (k-1) columns
	 *
	 * (c) for each group count different values in the last column
	 */

	/* prepare the sort function for the first dimension, and SortItem array */
	for (i = 0; i < k; i++)
	{
		VacAttrStats *colstat = stats[dependency[i]];
		TypeCacheEntry *type;

		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
		if (type->lt_opr == InvalidOid) /* shouldn't happen */
			elog(ERROR, "cache lookup failed for ordering operator for type %u",
				 colstat->attrtypid);

		/* prepare the sort function for this dimension */
		multi_sort_add_dimension(mss, i, type->lt_opr);

		/* accumulate all the data for both columns into an array and sort it */
		for (j = 0; j < numrows; j++)
		{
			items[j].values[i] =
				heap_getattr(rows[j], attnums[dependency[i]],
							 stats[i]->tupDesc, &items[j].isnull[i]);
		}
	}

	/* sort the items so that we can detect the groups */
	qsort_arg((void *) items, numrows, sizeof(SortItem),
			  multi_sort_compare, mss);

	/*
	 * Walk through the sorted array, split it into rows according to the
	 * first (k-1) columns. If there's a single value in the last column, we
	 * count the group as 'supporting' the functional dependency. Otherwise we
	 * count it as contradicting.
	 */

	/* start with the first row forming a group */
	group_size = 1;

	/* loop 1 beyond the end of the array so that we count the final group */
	for (i = 1; i <= numrows; i++)
	{
		/*
		 * Check if the group ended, which may be either because we processed
		 * all the items (i==numrows), or because the i-th item is not equal
		 * to the preceding one.
		 */
		if (i == numrows ||
			multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
		{
			/*
			 * If no violations were found in the group then track the rows of
			 * the group as supporting the functional dependency.
			 */
			if (n_violations == 0)
				n_supporting_rows += group_size;

			/* Reset counters for the new group */
			n_violations = 0;
			group_size = 1;
			continue;
		}
		/* first columns match, but the last one does not (so contradicting) */
		else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
			n_violations++;

		group_size++;
	}

	pfree(items);
	pfree(values);
	pfree(isnull);
	pfree(mss);

	/* Compute the 'degree of validity' as (supporting/total). */
	return (n_supporting_rows * 1.0 / numrows);
}