コード例 #1
0
ファイル: lwgeom.c プロジェクト: abuhamid/bd_geonode
void
lwgeom_set_geodetic(LWGEOM *geom, int value)
{
	LWPOINT *pt;
	LWLINE *ln;
	LWPOLY *ply;
	LWCOLLECTION *col;
	int i;
	
	FLAGS_SET_GEODETIC(geom->flags, value);
	if ( geom->bbox )
		FLAGS_SET_GEODETIC(geom->bbox->flags, value);
	
	switch(geom->type)
	{
		case POINTTYPE:
			pt = (LWPOINT*)geom;
			if ( pt->point )
				FLAGS_SET_GEODETIC(pt->point->flags, value);
			break;
		case LINETYPE:
			ln = (LWLINE*)geom;
			if ( ln->points )
				FLAGS_SET_GEODETIC(ln->points->flags, value);
			break;
		case POLYGONTYPE:
			ply = (LWPOLY*)geom;
			for ( i = 0; i < ply->nrings; i++ )
				FLAGS_SET_GEODETIC(ply->rings[i]->flags, value);
			break;
		case MULTIPOINTTYPE:
		case MULTILINETYPE:
		case MULTIPOLYGONTYPE:
		case COLLECTIONTYPE:
			col = (LWCOLLECTION*)geom;
			for ( i = 0; i < col->ngeoms; i++ )
				lwgeom_set_geodetic(col->geoms[i], value);
			break;
		default:
			lwerror("lwgeom_set_geodetic: unsupported geom type: %s", lwtype_name(geom->type));
			return;
	}
}
コード例 #2
0
Datum geography_gist_selectivity(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	/* int varRelid = PG_GETARG_INT32(3); */
	Oid relid;
	HeapTuple stats_tuple;
	GEOG_STATS *geogstats;
	/*
	 * This is to avoid casting the corresponding
	 * "type-punned" pointer, which would break
	 * "strict-aliasing rules".
	 */
	GEOG_STATS **gsptr=&geogstats;
	int geogstats_nvalues = 0;
	Node *other;
	Var *self;
	GBOX search_box;
	float8 selectivity = 0;

	POSTGIS_DEBUG(2, "geography_gist_selectivity called");

	/* Fail if not a binary opclause (probably shouldn't happen) */
	if (list_length(args) != 2)
	{
		POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	 * This selectivity function is invoked by a clause of the form <arg> && <arg>
	 *
	 * In typical usage, one argument will be a column reference, while the other will
	 * be a geography constant; set self to point to the column argument and other
	 * to point to the constant argument.
	 */
	other = (Node *) linitial(args);
	if ( ! IsA(other, Const) )
	{
		self = (Var *)other;
		other = (Node *) lsecond(args);
	}
	else
	{
		self = (Var *) lsecond(args);
	}

	if ( ! IsA(other, Const) )
	{
		POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	* We don't have a nice <const> && <var> or <var> && <const> 
	* situation here. <const> && <const> would probably get evaluated
	* away by PgSQL earlier on. <func> && <const> is harder, and the
	* case we get often is <const> && ST_Expand(<var>), which does 
	* actually have a subtly different selectivity than a bae
	* <const> && <var> call. It's calculatable though, by expanding
	* every cell in the histgram appropriately.
	* 
	* Discussion: http://trac.osgeo.org/postgis/ticket/1828
	*
	* To do? Do variable selectivity based on the <func> node.
	*/
	if ( ! IsA(self, Var) )
	{
		POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
//		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
		PG_RETURN_FLOAT8(0.33333);
	}

	/* Convert coordinates to 3D geodesic */
	search_box.flags = 1;
	FLAGS_SET_GEODETIC(search_box.flags, 1);
	if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
	{
		POSTGIS_DEBUG(3, " search box cannot be calculated");
		PG_RETURN_FLOAT8(0.0);
	}

	POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g",
	               search_box.xmin, search_box.ymin, search_box.zmin,
	               search_box.xmax, search_box.ymax, search_box.zmax);

	/*
	 * Get pg_statistic row
	 */
	relid = getrelid(self->varno, root->parse->rtable);

	stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
	if ( ! stats_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default estimate");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gsptr, &geogstats_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity");

		ReleaseSysCache(stats_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues);

	POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin);
	POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax);
	POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx);
	POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy);
	POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);

	/*
	 * Do the estimation
	 */
	selectivity = estimate_selectivity(&search_box, geogstats);

	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);

	free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues);
	ReleaseSysCache(stats_tuple);
	PG_RETURN_FLOAT8(selectivity);
}
コード例 #3
0
static void
compute_geography_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
                        int samplerows, double totalrows)
{
	MemoryContext old_context;
	GEOG_STATS *geogstats;

	GBOX gbox;

	GBOX *sample_extent = NULL;
	GBOX **sampleboxes;
	GBOX histobox;
	int histocells;
	double sizex, sizey, sizez, edgelength;
	int unitsx = 0, unitsy = 0, unitsz = 0;
	int geog_stats_size;
	struct dimensions histodims[3];
	int ndims;

	double total_width = 0;
	int notnull_cnt = 0, examinedsamples = 0, total_count_cells=0, total_cells_coverage = 0;

#if USE_STANDARD_DEVIATION
	/* for standard deviation */
	double avgLOWx, avgLOWy, avgLOWz, avgHIGx, avgHIGy, avgHIGz;
	double sumLOWx = 0, sumLOWy = 0, sumLOWz = 0, sumHIGx = 0, sumHIGy = 0, sumHIGz = 0;
	double sdLOWx = 0, sdLOWy = 0, sdLOWz = 0, sdHIGx = 0, sdHIGy = 0, sdHIGz = 0;
	GBOX *newhistobox = NULL;
#endif

	bool isnull;
	int i;

	POSTGIS_DEBUG(2, "compute_geography_stats called");

	/*
	 * We'll build an histogram having from 40 to 400 boxesPerSide
	 * Total number of cells is determined by attribute stat
	 * target. It can go from  1600 to 160000 (stat target: 10,1000)
	 */
	histocells = 160 * stats->attr->attstattarget;

	/*
	 * Memory to store the bounding boxes from all of the sampled rows
	 */
	sampleboxes = palloc(sizeof(GBOX *) * samplerows);

	/* Mark the GBOX as being geodetic */
	FLAGS_SET_GEODETIC(gbox.flags, 1);

	/*
	 * First scan:
	 *  o find extent of the sample rows
	 *  o count null-infinite/not-null values
	 *  o compute total_width
	 *  o compute total features's box area (for avgFeatureArea)
	 *  o sum features box coordinates (for standard deviation)
	 */
	for (i = 0; i < samplerows; i++)
	{
		Datum datum;
		GSERIALIZED *serialized;

		/* Fetch the datum and cast it into a geography */
		datum = fetchfunc(stats, i, &isnull);

		/* Skip nulls */
		if (isnull)
			continue;

		serialized = (GSERIALIZED *)PG_DETOAST_DATUM(datum);

		/* Convert coordinates to 3D geodesic */
		if ( ! gserialized_get_gbox_p(serialized, &gbox) )
		{
			/* Unable to obtain or calculate a bounding box */
			POSTGIS_DEBUGF(3, "skipping geometry at position %d", i);

			continue;
		}

		/*
		 * Skip infinite geoms
		 */
		if ( ! finite(gbox.xmin) || ! finite(gbox.xmax) ||
		        ! finite(gbox.ymin) || ! finite(gbox.ymax) ||
		        ! finite(gbox.zmin) || ! finite(gbox.zmax) )
		{
			POSTGIS_DEBUGF(3, " skipped infinite geometry at position %d", i);

			continue;
		}

		/*
		 * Store bounding box in array
		 */
		sampleboxes[notnull_cnt] = palloc(sizeof(GBOX));
		memcpy(sampleboxes[notnull_cnt], &gbox, sizeof(GBOX));

		/*
		 * Add to sample extent union
		 */
		if ( ! sample_extent )
		{
			sample_extent = palloc(sizeof(GBOX));
			memcpy(sample_extent, &gbox, sizeof(GBOX));
		}
		else
		{
			sample_extent->xmax = Max(sample_extent->xmax, gbox.xmax);
			sample_extent->ymax = Max(sample_extent->ymax, gbox.ymax);
			sample_extent->zmax = Max(sample_extent->zmax, gbox.zmax);
			sample_extent->xmin = Min(sample_extent->xmin, gbox.xmin);
			sample_extent->ymin = Min(sample_extent->ymin, gbox.ymin);
			sample_extent->zmin = Min(sample_extent->zmin, gbox.zmin);
		}

		/** TODO: ask if we need geom or bvol size for stawidth */
		total_width += VARSIZE(serialized);

#if USE_STANDARD_DEVIATION
		/*
		 * Add bvol coordinates to sum for standard deviation
		 * computation.
		 */
		sumLOWx += gbox.xmin;
		sumLOWy += gbox.ymin;
		sumLOWz += gbox.zmin;
		sumHIGx += gbox.xmax;
		sumHIGy += gbox.ymax;
		sumHIGz += gbox.zmax;
#endif

		notnull_cnt++;

		/* give backend a chance of interrupting us */
		vacuum_delay_point();
	}

	POSTGIS_DEBUG(3, "End of 1st scan:");
	POSTGIS_DEBUGF(3, " Sample extent (min, max): (%g %g %g), (%g %g %g)", sample_extent->xmin, sample_extent->ymin,
	               sample_extent->zmin, sample_extent->xmax, sample_extent->ymax, sample_extent->zmax);
	POSTGIS_DEBUGF(3, " No. of geometries sampled: %d", samplerows);
	POSTGIS_DEBUGF(3, " No. of non-null geometries sampled: %d", notnull_cnt);

	if ( ! notnull_cnt )
	{
		elog(NOTICE, " no notnull values, invalid stats");
#if defined (HQ_VERSION_NUM) && HQ_VERSION_NUM == 20000
		stats->pgstat.stats_valid = false;
#else
		stats->stats_valid = false;
#endif
		return;
	}

#if USE_STANDARD_DEVIATION

	POSTGIS_DEBUG(3, "Standard deviation filter enabled");

	/*
	 * Second scan:
	 *  o compute standard deviation
	 */
	avgLOWx = sumLOWx / notnull_cnt;
	avgLOWy = sumLOWy / notnull_cnt;
	avgLOWz = sumLOWz / notnull_cnt;
	avgHIGx = sumHIGx / notnull_cnt;
	avgHIGy = sumHIGy / notnull_cnt;
	avgHIGz = sumHIGz / notnull_cnt;

	for (i = 0; i < notnull_cnt; i++)
	{
		GBOX *box;
		box = (GBOX *)sampleboxes[i];

		sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx);
		sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy);
		sdLOWz += (box->zmin - avgLOWz) * (box->zmin - avgLOWz);
		sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx);
		sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy);
		sdHIGz += (box->zmax - avgHIGz) * (box->zmax - avgHIGz);
	}
	sdLOWx = sqrt(sdLOWx / notnull_cnt);
	sdLOWy = sqrt(sdLOWy / notnull_cnt);
	sdLOWz = sqrt(sdLOWz / notnull_cnt);
	sdHIGx = sqrt(sdHIGx / notnull_cnt);
	sdHIGy = sqrt(sdHIGy / notnull_cnt);
	sdHIGz = sqrt(sdHIGz / notnull_cnt);

	POSTGIS_DEBUG(3, " standard deviations:");
	POSTGIS_DEBUGF(3, "  LOWx - avg:%f sd:%f", avgLOWx, sdLOWx);
	POSTGIS_DEBUGF(3, "  LOWy - avg:%f sd:%f", avgLOWy, sdLOWy);
	POSTGIS_DEBUGF(3, "  LOWz - avg:%f sd:%f", avgLOWz, sdLOWz);
	POSTGIS_DEBUGF(3, "  HIGx - avg:%f sd:%f", avgHIGx, sdHIGx);
	POSTGIS_DEBUGF(3, "  HIGy - avg:%f sd:%f", avgHIGy, sdHIGy);
	POSTGIS_DEBUGF(3, "  HIGz - avg:%f sd:%f", avgHIGz, sdHIGz);

	histobox.xmin = Max((avgLOWx - SDFACTOR * sdLOWx), sample_extent->xmin);
	histobox.ymin = Max((avgLOWy - SDFACTOR * sdLOWy), sample_extent->ymin);
	histobox.zmin = Max((avgLOWz - SDFACTOR * sdLOWz), sample_extent->zmin);
	histobox.xmax = Min((avgHIGx + SDFACTOR * sdHIGx), sample_extent->xmax);
	histobox.ymax = Min((avgHIGy + SDFACTOR * sdHIGy), sample_extent->ymax);
	histobox.zmax = Min((avgHIGz + SDFACTOR * sdHIGz), sample_extent->zmax);
	histobox.mmax = 0.0;
	histobox.mmin = 0.0;
	histobox.flags = 0;

	POSTGIS_DEBUGF(3, " sd_extent: xmin, ymin, zmin: %f, %f, %f",
	               histobox.xmin, histobox.ymin, histobox.zmin);
	POSTGIS_DEBUGF(3, " sd_extent: xmax, ymax, zmax: %f, %f, %f",
	               histobox.xmax, histobox.ymax, histobox.zmax);

	/*
	 * Third scan:
	 *   o skip hard deviants
	 *   o compute new histogram box
	 */
	for (i = 0; i < notnull_cnt; i++)
	{
		GBOX *box;
		box = (GBOX *)sampleboxes[i];

		if ( box->xmin > histobox.xmax || box->xmax < histobox.xmin ||
		        box->ymin > histobox.ymax || box->ymax < histobox.ymin ||
		        box->zmin > histobox.zmax || box->zmax < histobox.zmin)
		{
			POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i);

			sampleboxes[i] = NULL;
			continue;
		}

		if ( ! newhistobox )
		{
			newhistobox = palloc(sizeof(GBOX));
			memcpy(newhistobox, box, sizeof(GBOX));
		}
		else
		{
			if ( box->xmin < newhistobox->xmin )
				newhistobox->xmin = box->xmin;
			if ( box->ymin < newhistobox->ymin )
				newhistobox->ymin = box->ymin;
			if ( box->zmin < newhistobox->zmin )
				newhistobox->zmin = box->zmin;
			if ( box->xmax > newhistobox->xmax )
				newhistobox->xmax = box->xmax;
			if ( box->ymax > newhistobox->ymax )
				newhistobox->ymax = box->ymax;
			if ( box->zmax > newhistobox->zmax )
				newhistobox->zmax = box->zmax;
		}
	}

	/* If everything was a deviant, the new histobox is the same as the old histobox */
	if ( ! newhistobox )
	{
		newhistobox = palloc(sizeof(GBOX));
		memcpy(newhistobox, &histobox, sizeof(GBOX));
	}

	/*
	 * Set histogram extent as the intersection between
	 * standard deviation based histogram extent
	 * and computed sample extent after removal of
	 * hard deviants (there might be no hard deviants).
	 */
	if ( histobox.xmin < newhistobox->xmin )
		histobox.xmin = newhistobox->xmin;
	if ( histobox.ymin < newhistobox->ymin )
		histobox.ymin = newhistobox->ymin;
	if ( histobox.zmin < newhistobox->zmin )
		histobox.zmin = newhistobox->zmin;
	if ( histobox.xmax > newhistobox->xmax )
		histobox.xmax = newhistobox->xmax;
	if ( histobox.ymax > newhistobox->ymax )
		histobox.ymax = newhistobox->ymax;
	if ( histobox.zmax > newhistobox->zmax )
		histobox.zmax = newhistobox->zmax;

#else /* ! USE_STANDARD_DEVIATION */

	/*
	* Set histogram extent box
	*/
	histobox.xmin = sample_extent->xmin;
	histobox.ymin = sample_extent->ymin;
	histobox.zmin = sample_extent->zmin;
	histobox.xmax = sample_extent->xmax;
	histobox.ymax = sample_extent->ymax;
	histobox.zmax = sample_extent->zmax;

#endif /* USE_STANDARD_DEVIATION */


	POSTGIS_DEBUGF(3, " histogram_extent: xmin, ymin, zmin: %f, %f, %f",
	               histobox.xmin, histobox.ymin, histobox.zmin);
	POSTGIS_DEBUGF(3, " histogram_extent: xmax, ymax, zmax: %f, %f, %f",
	               histobox.xmax, histobox.ymax, histobox.zmax);

	/* Calculate the size of each dimension */
	sizex = histobox.xmax - histobox.xmin;
	sizey = histobox.ymax - histobox.ymin;
	sizez = histobox.zmax - histobox.zmin;

	/* In order to calculate a suitable aspect ratio for the histogram, we need
	   to work out how many dimensions exist within our sample data (which we
	   assume is representative of the whole data) */
	ndims = 0;
	if (sizex != 0)
	{
		histodims[ndims].axis = 'X';
		histodims[ndims].min = histobox.xmin;
		histodims[ndims].max = histobox.xmax;
		ndims++;
	}

	if (sizey != 0)
	{
		histodims[ndims].axis = 'Y';
		histodims[ndims].min = histobox.ymin;
		histodims[ndims].max = histobox.ymax;

		ndims++;
	}

	if (sizez != 0)
	{
		histodims[ndims].axis = 'Z';
		histodims[ndims].min = histobox.zmin;
		histodims[ndims].max = histobox.zmax;

		ndims++;
	}

	/* Based upon the number of dimensions, we now work out the number of units in each dimension.
	   The number of units is defined as the number of cell blocks in each dimension which make
	   up the total number of histocells; i.e. unitsx * unitsy * unitsz = histocells */

	/* Note: geodetic data is currently indexed in 3 dimensions; however code for remaining dimensions
	   is also included to allow for indexing 3D cartesian data at a later date */

	POSTGIS_DEBUGF(3, "Number of dimensions in sample set: %d", ndims);

	switch (ndims)
	{
	case 0:
		/* An empty column, or multiple points in exactly the same
		   position in space */
		unitsx = 1;
		unitsy = 1;
		unitsz = 1;
		histocells = 1;
		break;

	case 1:
		/* Sample data all lies on a single line, so set the correct
		   units variables depending upon which axis is in use */
		for (i = 0; i < ndims; i++)
		{
			if ( (histodims[i].max - histodims[i].min) != 0)
			{
				/* We've found the non-zero dimension, so set the
				   units variables accordingly */
				switch (histodims[i].axis)
				{
				case 'X':
					unitsx = histocells;
					unitsy = 1;
					unitsz = 1;
					break;

				case 'Y':
					unitsx = 1;
					unitsy = histocells;
					unitsz = 1;
					break;

				case 'Z':
					unitsx = 1;
					unitsy = 1;
					unitsz = histocells;
					break;
				}
			}
		}
		break;

	case 2:
		/* Sample data lies within 2D space: divide the total area by the total
		   number of cells, and thus work out the edge size of the unit block */
		edgelength = sqrt(
		                 Abs(histodims[0].max - histodims[0].min) *
		                 Abs(histodims[1].max - histodims[1].min) / (double)histocells
		             );

		/* The calculation is easy; the harder part is to work out which dimensions
		   we actually have to set the units variables appropriately */
		if (histodims[0].axis == 'X' && histodims[1].axis == 'Y')
		{
			/* X and Y */
			unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
			unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
			unitsz = 1;
		}
		else if (histodims[0].axis == 'Y' && histodims[1].axis == 'X')
		{
			/* Y and X */
			unitsx = Abs(histodims[1].max - histodims[1].min) / edgelength;
			unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength;
			unitsz = 1;
		}
		else if (histodims[0].axis == 'X' && histodims[1].axis == 'Z')
		{
			/* X and Z */
			unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
			unitsy = 1;
			unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength;
		}
		else if (histodims[0].axis == 'Z' && histodims[1].axis == 'X')
		{
			/* Z and X */
			unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
			unitsy = 1;
			unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength;
		}
		else if (histodims[0].axis == 'Y' && histodims[1].axis == 'Z')
		{
			/* Y and Z */
			unitsx = 1;
			unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength;
			unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength;
		}
		else if (histodims[0].axis == 'Z' && histodims[1].axis == 'Y')
		{
			/* Z and X */
			unitsx = 1;
			unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
			unitsz = Abs(histodims[0].max - histodims[0].min) / edgelength;
		}

		break;

	case 3:
		/* Sample data lies within 3D space: divide the total volume by the total
		   number of cells, and thus work out the edge size of the unit block */
		edgelength = pow(
		                 Abs(histodims[0].max - histodims[0].min) *
		                 Abs(histodims[1].max - histodims[1].min) *
		                 Abs(histodims[2].max - histodims[2].min) / (double)histocells,
		                 (double)1/3);

		/* Units are simple in 3 dimensions */
		unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
		unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
		unitsz = Abs(histodims[2].max - histodims[2].min) / edgelength;

		break;
	}

	POSTGIS_DEBUGF(3, " computed histogram grid size (X,Y,Z): %d x %d x %d (%d out of %d cells)", unitsx, unitsy, unitsz, unitsx * unitsy * unitsz, histocells);

	/*
	 * Create the histogram (GEOG_STATS)
	 */
	old_context = MemoryContextSwitchTo(stats->anl_context);
	geog_stats_size = sizeof(GEOG_STATS) + (histocells - 1) * sizeof(float4);
	geogstats = palloc(geog_stats_size);
	MemoryContextSwitchTo(old_context);

	geogstats->dims = ndims;
	geogstats->xmin = histobox.xmin;
	geogstats->ymin = histobox.ymin;
	geogstats->zmin = histobox.zmin;
	geogstats->xmax = histobox.xmax;
	geogstats->ymax = histobox.ymax;
	geogstats->zmax = histobox.zmax;
	geogstats->unitsx = unitsx;
	geogstats->unitsy = unitsy;
	geogstats->unitsz = unitsz;
	geogstats->totalrows = totalrows;

	/* Initialize all values to 0 */
	for (i = 0; i < histocells; i++)
		geogstats->value[i] = 0;


	/*
	 * Fourth scan:
	 *  o fill histogram values with the number of
	 *    features' bbox overlaps: a feature's bvol
	 *    can fully overlap (1) or partially overlap
	 *    (fraction of 1) an histogram cell.
	 *
	 *  o compute total cells occupation
	 *
	 */

	POSTGIS_DEBUG(3, "Beginning histogram intersection calculations");

	for (i = 0; i < notnull_cnt; i++)
	{
		GBOX *box;

		/* Note these array index variables are zero-based */
		int x_idx_min, x_idx_max, x;
		int y_idx_min, y_idx_max, y;
		int z_idx_min, z_idx_max, z;
		int numcells = 0;

		box = (GBOX *)sampleboxes[i];
		if ( ! box ) continue; /* hard deviant.. */

		/* give backend a chance of interrupting us */
		vacuum_delay_point();

		POSTGIS_DEBUGF(4, " feat %d box is %f %f %f, %f %f %f",
		               i, box->xmax, box->ymax, box->zmax, box->xmin, box->ymin, box->zmin);

		/* Find first overlapping unitsx cell */
		x_idx_min = (box->xmin - geogstats->xmin) / sizex * unitsx;
		if (x_idx_min <0) x_idx_min = 0;
		if (x_idx_min >= unitsx) x_idx_min = unitsx - 1;

		/* Find first overlapping unitsy cell */
		y_idx_min = (box->ymin - geogstats->ymin) / sizey * unitsy;
		if (y_idx_min <0) y_idx_min = 0;
		if (y_idx_min >= unitsy) y_idx_min = unitsy - 1;

		/* Find first overlapping unitsz cell */
		z_idx_min = (box->zmin - geogstats->zmin) / sizez * unitsz;
		if (z_idx_min <0) z_idx_min = 0;
		if (z_idx_min >= unitsz) z_idx_min = unitsz - 1;

		/* Find last overlapping unitsx cell */
		x_idx_max = (box->xmax - geogstats->xmin) / sizex * unitsx;
		if (x_idx_max <0) x_idx_max = 0;
		if (x_idx_max >= unitsx ) x_idx_max = unitsx - 1;

		/* Find last overlapping unitsy cell */
		y_idx_max = (box->ymax - geogstats->ymin) / sizey * unitsy;
		if (y_idx_max <0) y_idx_max = 0;
		if (y_idx_max >= unitsy) y_idx_max = unitsy - 1;

		/* Find last overlapping unitsz cell */
		z_idx_max = (box->zmax - geogstats->zmin) / sizez * unitsz;
		if (z_idx_max <0) z_idx_max = 0;
		if (z_idx_max >= unitsz) z_idx_max = unitsz - 1;

		POSTGIS_DEBUGF(4, " feat %d overlaps unitsx %d-%d, unitsy %d-%d, unitsz %d-%d",
		               i, x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max);

		/* Calculate the feature coverage - this of course depends upon the number of dims */
		switch (ndims)
		{
		case 1:
			total_cells_coverage++;
			break;

		case 2:
			total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin);
			break;

		case 3:
			total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin) *
			                        (box->zmax - box->zmin);
			break;
		}

		/*
		 * the {x,y,z}_idx_{min,max}
		 * define the grid squares that the box intersects
		 */

		for (z = z_idx_min; z <= z_idx_max; z++)
		{
			for (y = y_idx_min; y <= y_idx_max; y++)
			{
				for (x = x_idx_min; x <= x_idx_max; x++)
				{
					geogstats->value[x + y * unitsx + z * unitsx * unitsy] += 1;
					numcells++;
				}
			}
		}

		/*
		 * before adding to the total cells
		 * we could decide if we really
		 * want this feature to count
		 */
		total_count_cells += numcells;

		examinedsamples++;
	}

	POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows);

	if ( ! examinedsamples )
	{
		elog(NOTICE, " no examined values, invalid stats");
#if defined (HQ_VERSION_NUM) && HQ_VERSION_NUM == 20000
        stats->pgstat.stats_valid = false;
#else
		stats->stats_valid = false;
#endif

		POSTGIS_DEBUG(3, " no stats have been gathered");

		return;
	}

	/** TODO: what about null features (TODO) */
	geogstats->avgFeatureCells = (float4)total_count_cells / examinedsamples;
	geogstats->avgFeatureCoverage = total_cells_coverage / examinedsamples;

	POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_count_cells);
	POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);
	POSTGIS_DEBUGF(3, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);

	/*
	 * Normalize histogram
	 *
	 * We divide each histogram cell value
	 * by the number of samples examined.
	 *
	 */
	for (i = 0; i < histocells; i++)
		geogstats->value[i] /= examinedsamples;

#if POSTGIS_DEBUG_LEVEL >= 4
	/* Dump the resulting histogram for analysis */
	{
		int x, y, z;
		for (x = 0; x < unitsx; x++)
		{
			for (y = 0; y < unitsy; y++)
			{
				for (z = 0; z < unitsz; z++)
				{
					POSTGIS_DEBUGF(4, " histo[%d,%d,%d] = %.15f", x, y, z,
					               geogstats->value[x + y * unitsx + z * unitsx * unitsy]);
				}
			}
		}
	}
#endif

	/*
	 * Write the statistics data
	 */
#if defined (HQ_VERSION_NUM) && HQ_VERSION_NUM == 20000
    stats->pgstat.stakind[0] = STATISTIC_KIND_GEOGRAPHY;
    stats->pgstat.staop[0] = InvalidOid;
    stats->pgstat.stanumbers[0] = (float4 *)geogstats;
    stats->pgstat.numnumbers[0] = geog_stats_size/sizeof(float4);

    stats->pgstat.stanullfrac = (float4)(samplerows - notnull_cnt)/samplerows;
    stats->pgstat.stawidth = total_width/notnull_cnt;
    stats->pgstat.stadistinct = -1.0;

    POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOGRAPHY)",
                   stats->pgstat.stakind[0]);
    POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->pgstat.staop[0]);
    POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->pgstat.numnumbers[0]);
    POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", (samplerows - notnull_cnt), samplerows, stats->pgstat.stanullfrac);
    POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->pgstat.stawidth);
    POSTGIS_DEBUG(3, " out: distinct values: all (no check done)");

    stats->pgstat.stats_valid = true;
#else
	stats->stakind[0] = STATISTIC_KIND_GEOGRAPHY;
	stats->staop[0] = InvalidOid;
	stats->stanumbers[0] = (float4 *)geogstats;
	stats->numnumbers[0] = geog_stats_size/sizeof(float4);

	stats->stanullfrac = (float4)(samplerows - notnull_cnt)/samplerows;
	stats->stawidth = total_width/notnull_cnt;
	stats->stadistinct = -1.0;

	POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOGRAPHY)",
	               stats->stakind[0]);
	POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
	POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
	POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", (samplerows - notnull_cnt), samplerows, stats->stanullfrac);
	POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
	POSTGIS_DEBUG(3, " out: distinct values: all (no check done)");

	stats->stats_valid = true;
#endif
}
コード例 #4
0
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
{
	text *txnsp = NULL;
	text *txtbl = NULL;
	text *txcol = NULL;
	char *nsp = NULL;
	char *tbl = NULL;
	char *col = NULL;
	char *query;
	ArrayType *array = NULL;
	int SPIcode;
	SPITupleTable *tuptable;
	TupleDesc tupdesc ;
	HeapTuple tuple ;
	bool isnull;
	GBOX *box;
	size_t querysize;
	GEOM_STATS geomstats;
	float reltuples;
	Datum binval;

	if ( PG_NARGS() == 3 )
	{
		txnsp = PG_GETARG_TEXT_P(0);
		txtbl = PG_GETARG_TEXT_P(1);
		txcol = PG_GETARG_TEXT_P(2);
	}
	else if ( PG_NARGS() == 2 )
	{
		txtbl = PG_GETARG_TEXT_P(0);
		txcol = PG_GETARG_TEXT_P(1);
	}
	else
	{
		elog(ERROR, "estimated_extent() called with wrong number of arguments");
		PG_RETURN_NULL();
	}

	POSTGIS_DEBUG(2, "geomtery_estimated_extent called");

	/* Connect to SPI manager */
	SPIcode = SPI_connect();
	if (SPIcode != SPI_OK_CONNECT)
	{
		elog(ERROR, "geometry_estimated_extent: couldnt open a connection to SPI");
		PG_RETURN_NULL() ;
	}

	querysize = VARSIZE(txtbl)+VARSIZE(txcol)+516;

	if ( txnsp )
	{
		nsp = text2cstring(txnsp);
		querysize += VARSIZE(txnsp);
	}
	else
	{
		querysize += 32; /* current_schema() */
	}

	tbl = text2cstring(txtbl);
	col = text2cstring(txcol);

#if POSTGIS_DEBUG_LEVEL > 0
	if ( txnsp )
	{
		POSTGIS_DEBUGF(3, " schema:%s table:%s column:%s", nsp, tbl, col);
	}
	else
	{
		POSTGIS_DEBUGF(3, " schema:current_schema() table:%s column:%s",
		               tbl, col);
	}
#endif

	query = palloc(querysize);


	/* Security check: because we access information in the pg_statistic table, we must run as the database
	superuser (by marking the function as SECURITY DEFINER) and check permissions ourselves */
	if ( txnsp )
	{
		sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\".\"%s\"', 'select')", nsp, tbl);
	}
	else
	{
		sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\"', 'select')", tbl);
	}

	POSTGIS_DEBUGF(4, "permission check sql query is: %s", query);

	SPIcode = SPI_exec(query, 1);
	if (SPIcode != SPI_OK_SELECT)
	{
		elog(ERROR, "geometry_estimated_extent: couldn't execute permission check sql via SPI");
		SPI_finish();
		PG_RETURN_NULL();
	}

	tuptable = SPI_tuptable;
	tupdesc = SPI_tuptable->tupdesc;
	tuple = tuptable->vals[0];

	if (!DatumGetBool(SPI_getbinval(tuple, tupdesc, 1, &isnull)))
	{
		elog(ERROR, "geometry_estimated_extent: permission denied for relation %s", tbl);
		SPI_finish();
		PG_RETURN_NULL();
	}


	/* Return the stats data */
	if ( txnsp )
	{
	  sprintf(query, 
	    "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c"
	    " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)"
	    " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )"
	    " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND "
	                                        "s.staattnum = a.attnum )"
	    " WHERE c.relname = '%s' AND a.attname = '%s' "
	    " AND n.nspname = '%s';",
	    tbl, col, nsp);
	}
	else
	{
	  sprintf(query, 
	    "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c"
	    " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)"
	    " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )"
	    " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND "
	                                        "s.staattnum = a.attnum )"
	    " WHERE c.relname = '%s' AND a.attname = '%s' "
	    " AND n.nspname = current_schema();",
	    tbl, col);
	}

	POSTGIS_DEBUGF(4, " query: %s", query);

	SPIcode = SPI_exec(query, 1);
	if (SPIcode != SPI_OK_SELECT )
	{
		elog(ERROR,"geometry_estimated_extent: couldnt execute sql via SPI");
		SPI_finish();
		PG_RETURN_NULL();
	}
	if (SPI_processed != 1)
	{

		POSTGIS_DEBUGF(3, " %d stat rows", SPI_processed);

		elog(ERROR, "Unexistent field \"%s\".\"%s\".\"%s\"",
			( nsp ? nsp : "<current>" ), tbl, col);

		SPI_finish();
		PG_RETURN_NULL() ;
	}

	tuptable = SPI_tuptable;
	tupdesc = SPI_tuptable->tupdesc;
	tuple = tuptable->vals[0];

	/* Check if the table has zero rows first */
	binval = SPI_getbinval(tuple, tupdesc, 2, &isnull);
	if (isnull)
	{

		POSTGIS_DEBUG(3, " reltuples is NULL");

		elog(ERROR, "geometry_estimated_extent: null reltuples for table");

		SPI_finish();
		PG_RETURN_NULL();
	}
	reltuples = DatumGetFloat4(binval);
	if ( ! reltuples )
	{
		POSTGIS_DEBUG(3, "table has estimated zero rows");

		/* 
		 * TODO: distinguish between empty and not analyzed ?
		 */
		elog(NOTICE, "\"%s\".\"%s\".\"%s\" is empty or not analyzed",
			( nsp ? nsp : "<current>" ), tbl, col);

		SPI_finish();
		PG_RETURN_NULL();
	}

	binval = SPI_getbinval(tuple, tupdesc, 1, &isnull);
	if (isnull)
	{

		POSTGIS_DEBUG(3, " stats are NULL");

		elog(ERROR, "geometry_estimated_extent: null statistics for table");

		SPI_finish();
		PG_RETURN_NULL();
	}
	array = DatumGetArrayTypeP(binval);
	if ( ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)) != 4 )
	{
		elog(ERROR, " corrupted histogram");
		PG_RETURN_NULL();
	}

	POSTGIS_DEBUGF(3, " stats array has %d elems", ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)));

	/*
	 * Construct GBOX.
	 * Must allocate this in upper executor context
	 * to keep it alive after SPI_finish().
	 */
	box = SPI_palloc(sizeof(GBOX));
	FLAGS_SET_GEODETIC(box->flags, 0);
	FLAGS_SET_Z(box->flags, 0);
	FLAGS_SET_M(box->flags, 0);

	/* Construct the box */
	memcpy(&(geomstats.xmin), ARR_DATA_PTR(array), sizeof(float)*4);
	box->xmin = geomstats.xmin;
	box->xmax = geomstats.xmax;
	box->ymin = geomstats.ymin;
	box->ymax = geomstats.ymax;

	POSTGIS_DEBUGF(3, " histogram extent = %g %g, %g %g", box->xmin,
	               box->ymin, box->xmax, box->ymax);

	SPIcode = SPI_finish();
	if (SPIcode != SPI_OK_FINISH )
	{
		elog(ERROR, "geometry_estimated_extent: couldn't disconnect from SPI");
	}

	/* TODO: enlarge the box by some factor */

	PG_RETURN_POINTER(box);
}