Datum geography_gist_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOG_STATS *geogstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOG_STATS **gsptr=&geogstats; int geogstats_nvalues = 0; Node *other; Var *self; GBOX search_box; float8 selectivity = 0; POSTGIS_DEBUG(2, "geography_gist_selectivity called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * This selectivity function is invoked by a clause of the form <arg> && <arg> * * In typical usage, one argument will be a column reference, while the other will * be a geography constant; set self to point to the column argument and other * to point to the constant argument. */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * We don't have a nice <const> && <var> or <var> && <const> * situation here. <const> && <const> would probably get evaluated * away by PgSQL earlier on. <func> && <const> is harder, and the * case we get often is <const> && ST_Expand(<var>), which does * actually have a subtly different selectivity than a bae * <const> && <var> call. It's calculatable though, by expanding * every cell in the histgram appropriately. * * Discussion: http://trac.osgeo.org/postgis/ticket/1828 * * To do? Do variable selectivity based on the <func> node. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity"); // PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); PG_RETURN_FLOAT8(0.33333); } /* Convert coordinates to 3D geodesic */ search_box.flags = 1; FLAGS_SET_GEODETIC(search_box.flags, 1); if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, " search box cannot be calculated"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geogstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax); POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx); POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy); POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz); POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geogstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }
Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); Node *arg1, *arg2; Var *var1, *var2; Oid relid1, relid2; HeapTuple stats1_tuple, stats2_tuple; GEOG_STATS *geogstats1, *geogstats2; /* * These are to avoid casting the corresponding * "type-punned" pointers, which would break * "strict-aliasing rules". */ GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2; int geogstats1_nvalues = 0, geogstats2_nvalues = 0; float8 selectivity1 = 0.0, selectivity2 = 0.0; float4 num1_tuples = 0.0, num2_tuples = 0.0; float4 total_tuples = 0.0, rows_returned = 0.0; GBOX search_box; /** * Join selectivity algorithm. To calculation the selectivity we * calculate the intersection of the two column sample extents, * sum the results, and then multiply by two since for each * geometry in col 1 that intersects a geometry in col 2, the same * will also be true. */ POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype); /* * We'll only respond to an inner join/unknown context join */ if (jointype != JOIN_INNER) { elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * Determine the oids of the geometry columns we are working with */ arg1 = (Node *) linitial(args); arg2 = (Node *) lsecond(args); if (!IsA(arg1, Var) || !IsA(arg2, Var)) { elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } var1 = (Var *)arg1; var2 = (Var *)arg2; relid1 = getrelid(var1->varno, root->parse->rtable); relid2 = getrelid(var2->varno, root->parse->rtable); POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2); /* Read the stats tuple from the first column */ stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0); if ( ! stats1_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs1ptr, &geogstats1_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* Read the stats tuple from the second column */ stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0); if ( ! stats2_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs2ptr, &geogstats2_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats2_tuple); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /** * Setup the search box - this is the intersection of the two column * extents. */ search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin); search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin); search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin); search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax); search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax); search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax); /* If the extents of the two columns don't intersect, return zero */ if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax || search_box.zmin > search_box.zmax) PG_RETURN_FLOAT8(0.0); POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax); POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax); POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* Do the selectivity */ selectivity1 = estimate_selectivity(&search_box, geogstats1); selectivity2 = estimate_selectivity(&search_box, geogstats2); POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2); /* * OK, so before we calculate the join selectivity we also need to * know the number of tuples in each of the columns since * estimate_selectivity returns the number of estimated tuples * divided by the total number of tuples. */ num1_tuples = geogstats1->totalrows; num2_tuples = geogstats2->totalrows; /* Free the statistic tuples */ free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues); ReleaseSysCache(stats2_tuple); /* * Finally calculate the estimate of the number of rows returned * * = 2 * (nrows from col1 + nrows from col2) / * total nrows in col1 x total nrows in col2 * * The factor of 2 accounts for the fact that for each tuple in * col 1 matching col 2, * there will be another match in col 2 matching col 1 */ total_tuples = num1_tuples * num2_tuples; rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2)); POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1); POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2); POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned); /* * One (or both) tuple count is zero... * We return default selectivity estimate. * We could probably attempt at an estimate * w/out looking at tables tuple count, with * a function of selectivity1, selectivity2. */ if ( ! total_tuples ) { POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( rows_returned > total_tuples ) PG_RETURN_FLOAT8(1.0); PG_RETURN_FLOAT8(rows_returned / total_tuples); }
Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOM_STATS *geomstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOM_STATS **gsptr=&geomstats; int geomstats_nvalues=0; Node *other; Var *self; GBOX search_box; float8 selectivity=0; POSTGIS_DEBUG(2, "geometry_gist_sel called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Find the constant part */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * We are working on two constants.. * TODO: check if expression is true, * returned set would be either * the whole or none. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Convert the constant to a BOX */ if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, "search box is EMPTY"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geomstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f", geomstats->xmin, geomstats->ymin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f", geomstats->xmax, geomstats->ymax); POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows); POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols); POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geomstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }