Ejemplo n.º 1
0
Datum
pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
{
	/* time is already in msec, just convert to double for presentation */
	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time);
}
Ejemplo n.º 2
0
Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	JoinType jointype = (JoinType) PG_GETARG_INT16(3);

	Node *arg1, *arg2;
	Var *var1, *var2;
	Oid relid1, relid2;

	HeapTuple stats1_tuple, stats2_tuple;
	GEOG_STATS *geogstats1, *geogstats2;
	/*
	* These are to avoid casting the corresponding
	* "type-punned" pointers, which would break
	* "strict-aliasing rules".
	*/
	GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2;
	int geogstats1_nvalues = 0, geogstats2_nvalues = 0;
	float8 selectivity1 = 0.0, selectivity2 = 0.0;
	float4 num1_tuples = 0.0, num2_tuples = 0.0;
	float4 total_tuples = 0.0, rows_returned = 0.0;
	GBOX search_box;


	/**
	* Join selectivity algorithm. To calculation the selectivity we
	* calculate the intersection of the two column sample extents,
	* sum the results, and then multiply by two since for each
	* geometry in col 1 that intersects a geometry in col 2, the same
	* will also be true.
	*/

	POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype);

	/*
	* We'll only respond to an inner join/unknown context join
	*/
	if (jointype != JOIN_INNER)
	{
		elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type");
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	* Determine the oids of the geometry columns we are working with
	*/
	arg1 = (Node *) linitial(args);
	arg2 = (Node *) lsecond(args);

	if (!IsA(arg1, Var) || !IsA(arg2, Var))
	{
		elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references");
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	var1 = (Var *)arg1;
	var2 = (Var *)arg2;

	relid1 = getrelid(var1->varno, root->parse->rtable);
	relid2 = getrelid(var2->varno, root->parse->rtable);

	POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2);

	/* Read the stats tuple from the first column */
	stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0);
	if ( ! stats1_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gs1ptr, &geogstats1_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");

		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	/* Read the stats tuple from the second column */
	stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0);
	if ( ! stats2_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");

		free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gs2ptr, &geogstats2_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");

		free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
		ReleaseSysCache(stats2_tuple);
		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	/**
	* Setup the search box - this is the intersection of the two column
	* extents.
	*/
	search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin);
	search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin);
	search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin);
	search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax);
	search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax);
	search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax);

	/* If the extents of the two columns don't intersect, return zero */
	if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax ||
	        search_box.zmin > search_box.zmax)
		PG_RETURN_FLOAT8(0.0);

	POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax);
	POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax);
	POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax);


	/* Do the selectivity */
	selectivity1 = estimate_selectivity(&search_box, geogstats1);
	selectivity2 = estimate_selectivity(&search_box, geogstats2);

	POSTGIS_DEBUGF(3, "selectivity1: %.15g   selectivity2: %.15g", selectivity1, selectivity2);

	/*
	* OK, so before we calculate the join selectivity we also need to
	* know the number of tuples in each of the columns since
	* estimate_selectivity returns the number of estimated tuples
	* divided by the total number of tuples.
	*/
	num1_tuples = geogstats1->totalrows;
	num2_tuples = geogstats2->totalrows;

	/* Free the statistic tuples */
	free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
	ReleaseSysCache(stats1_tuple);

	free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues);
	ReleaseSysCache(stats2_tuple);

	/*
	* Finally calculate the estimate of the number of rows returned
	*
	*    = 2 * (nrows from col1 + nrows from col2) /
	*	total nrows in col1 x total nrows in col2
	*
	* The factor of 2 accounts for the fact that for each tuple in
	* col 1 matching col 2,
	* there will be another match in col 2 matching col 1
	*/
	total_tuples = num1_tuples * num2_tuples;
	rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2));

	POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1);
	POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2);
	POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned);

	/*
	* One (or both) tuple count is zero...
	* We return default selectivity estimate.
	* We could probably attempt at an estimate
	* w/out looking at tables tuple count, with
	* a function of selectivity1, selectivity2.
	*/
	if ( ! total_tuples )
	{
		POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( rows_returned > total_tuples )
		PG_RETURN_FLOAT8(1.0);

	PG_RETURN_FLOAT8(rows_returned / total_tuples);
}
Ejemplo n.º 3
0
Datum
gbfp_distance(PG_FUNCTION_ARGS)
{
    GISTENTRY      *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
    // bytea          *query = PG_GETARG_DATA_TYPE_P(1);
    StrategyNumber  strategy = (StrategyNumber) PG_GETARG_UINT16(2);
    bytea          *key = (bytea*)DatumGetPointer(entry->key);

    bytea          *query;
    double          nCommon, nCommonUp, nCommonDown, nQuery, distance;
    double          nKey = 0.0;

    fcinfo->flinfo->fn_extra = SearchBitmapFPCache(
                                                   fcinfo->flinfo->fn_extra,
                                                   fcinfo->flinfo->fn_mcxt,
                                                   PG_GETARG_DATUM(1),
                                                   NULL, NULL,&query);

    if (ISALLTRUE(query))
        elog(ERROR, "Query malformed");

    /*
    * Counts basic numbers, but don't count nKey on inner
    * page (see comments below)
    */
    nQuery = (double)sizebitvec(query);
    if (ISALLTRUE(key))
        {

        if (GIST_LEAF(entry)) nKey = (double)SIGLENBIT(query);

        nCommon = nQuery;
        }
    else
        {
        int i, cnt = 0;
        unsigned char *pk = (unsigned char*)VARDATA(key),
            *pq = (unsigned char*)VARDATA(query);

        if (SIGLEN(key) != SIGLEN(query))
            elog(ERROR, "All fingerprints should be the same length");

#ifndef USE_BUILTIN_POPCOUNT
        for(i=0;i<SIGLEN(key);i++)
            cnt += number_of_ones[ pk[i] & pq[i] ];
#else
        unsigned eidx=SIGLEN(key)/sizeof(unsigned int);
        for(i=0;i<SIGLEN(key)/sizeof(unsigned int);++i){
          cnt += __builtin_popcount(((unsigned int *)pk)[i] & ((unsigned int *)pq)[i]);
        }
        for(i=eidx*sizeof(unsigned);i<SIGLEN(key);++i){
          cnt += number_of_ones[ pk[i] & pq[i] ];
        }
#endif        

        nCommon = (double)cnt;
        if (GIST_LEAF(entry))
            nKey = (double)sizebitvec(key);
        }

    nCommonUp = nCommon;
    nCommonDown = nCommon;

    switch(strategy)
    {
        case RDKitOrderByTanimotoStrategy:
        /*
        * Nsame / (Na + Nb - Nsame)
        */
        if (GIST_LEAF(entry))
        {
            distance = nCommonUp / (nKey + nQuery - nCommonUp);
        }

        else
        {
            distance = nCommonUp / nQuery;
        }

        break;

        case RDKitOrderByDiceStrategy:
        /*
        * 2 * Nsame / (Na + Nb)
        */
        if (GIST_LEAF(entry))
        {
            distance = 2.0 * nCommonUp / (nKey + nQuery);
        }

        else
        {
            distance =  2.0 * nCommonUp / (nCommonDown + nQuery);
        }

        break;

        default:
        elog(ERROR,"Unknown strategy: %d", strategy);
    }

    PG_RETURN_FLOAT8(1.0 - distance);
}
Ejemplo n.º 4
0
/*
 *	ltreeparentsel - Selectivity of parent relationship for ltree data types.
 */
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	double		selec;

	/*
	 * If expression is not variable <@ something or something <@ variable,
	 * then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);

	/*
	 * If the something is a NULL constant, assume operator is strict and
	 * return zero, ie, operator will never return TRUE.
	 */
	if (IsA(other, Const) &&
		((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	if (IsA(other, Const))
	{
		/* Variable is being compared to a known non-null constant */
		Datum		constval = ((Const *) other)->constvalue;
		FmgrInfo	contproc;
		double		mcvsum;
		double		mcvsel;
		double		nullfrac;

		fmgr_info(get_opcode(operator), &contproc);

		/*
		 * Is the constant "<@" to any of the column's most common values?
		 */
		mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
								 &mcvsum);

		/*
		 * If the histogram is large enough, see what fraction of it the
		 * constant is "<@" to, and assume that's representative of the
		 * non-MCV population.	Otherwise use the default selectivity for the
		 * non-MCV population.
		 */
		selec = histogram_selectivity(&vardata, &contproc,
									  constval, varonleft,
									  100, 1);
		if (selec < 0)
		{
			/* Nope, fall back on default */
			selec = DEFAULT_PARENT_SEL;
		}
		else
		{
			/* Yes, but don't believe extremely small or large estimates. */
			if (selec < 0.0001)
				selec = 0.0001;
			else if (selec > 0.9999)
				selec = 0.9999;
		}

		if (HeapTupleIsValid(vardata.statsTuple))
			nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
		else
			nullfrac = 0.0;

		/*
		 * Now merge the results from the MCV and histogram calculations,
		 * realizing that the histogram covers only the non-null values that
		 * are not listed in MCV.
		 */
		selec *= 1.0 - nullfrac - mcvsum;
		selec += mcvsel;
	}
	else
		selec = DEFAULT_PARENT_SEL;

	ReleaseVariableStats(vardata);

	/* result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
Ejemplo n.º 5
0
Datum geography_gist_selectivity(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	/* int varRelid = PG_GETARG_INT32(3); */
	Oid relid;
	HeapTuple stats_tuple;
	GEOG_STATS *geogstats;
	/*
	 * This is to avoid casting the corresponding
	 * "type-punned" pointer, which would break
	 * "strict-aliasing rules".
	 */
	GEOG_STATS **gsptr=&geogstats;
	int geogstats_nvalues = 0;
	Node *other;
	Var *self;
	GBOX search_box;
	float8 selectivity = 0;

	POSTGIS_DEBUG(2, "geography_gist_selectivity called");

	/* Fail if not a binary opclause (probably shouldn't happen) */
	if (list_length(args) != 2)
	{
		POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	 * This selectivity function is invoked by a clause of the form <arg> && <arg>
	 *
	 * In typical usage, one argument will be a column reference, while the other will
	 * be a geography constant; set self to point to the column argument and other
	 * to point to the constant argument.
	 */
	other = (Node *) linitial(args);
	if ( ! IsA(other, Const) )
	{
		self = (Var *)other;
		other = (Node *) lsecond(args);
	}
	else
	{
		self = (Var *) lsecond(args);
	}

	if ( ! IsA(other, Const) )
	{
		POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	* We don't have a nice <const> && <var> or <var> && <const> 
	* situation here. <const> && <const> would probably get evaluated
	* away by PgSQL earlier on. <func> && <const> is harder, and the
	* case we get often is <const> && ST_Expand(<var>), which does 
	* actually have a subtly different selectivity than a bae
	* <const> && <var> call. It's calculatable though, by expanding
	* every cell in the histgram appropriately.
	* 
	* Discussion: http://trac.osgeo.org/postgis/ticket/1828
	*
	* To do? Do variable selectivity based on the <func> node.
	*/
	if ( ! IsA(self, Var) )
	{
		POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
//		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
		PG_RETURN_FLOAT8(0.33333);
	}

	/* Convert coordinates to 3D geodesic */
	search_box.flags = 1;
	FLAGS_SET_GEODETIC(search_box.flags, 1);
	if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
	{
		POSTGIS_DEBUG(3, " search box cannot be calculated");
		PG_RETURN_FLOAT8(0.0);
	}

	POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g",
	               search_box.xmin, search_box.ymin, search_box.zmin,
	               search_box.xmax, search_box.ymax, search_box.zmax);

	/*
	 * Get pg_statistic row
	 */
	relid = getrelid(self->varno, root->parse->rtable);

	stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
	if ( ! stats_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default estimate");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gsptr, &geogstats_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity");

		ReleaseSysCache(stats_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues);

	POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin);
	POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax);
	POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx);
	POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy);
	POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);

	/*
	 * Do the estimation
	 */
	selectivity = estimate_selectivity(&search_box, geogstats);

	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);

	free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues);
	ReleaseSysCache(stats_tuple);
	PG_RETURN_FLOAT8(selectivity);
}
Ejemplo n.º 6
0
Datum
decibelpascal(PG_FUNCTION_ARGS)
{
    float8   arg = PG_GETARG_FLOAT8(0);
    PG_RETURN_FLOAT8( pow( 10, arg / 10.0 ));
}
Ejemplo n.º 7
0
Datum
mongeelkan(PG_FUNCTION_ARGS)
{
	char		*a, *b;
	TokenList	*s, *t;
	Token		*p, *q;
	double		summatches;
	double		maxvalue;
	float8		res;

	a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0))));
	b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1))));

	if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				errmsg("argument exceeds the maximum length of %d bytes",
					PGS_MAX_STR_LEN)));

	/* lists */
	s = initTokenList(0);
	t = initTokenList(0);

	switch (pgs_mongeelkan_tokenizer)
	{
		case PGS_UNIT_WORD:
			tokenizeBySpace(s, a);
			tokenizeBySpace(t, b);
			break;
		case PGS_UNIT_GRAM:
			tokenizeByGram(s, a);
			tokenizeByGram(t, b);
			break;
		case PGS_UNIT_CAMELCASE:
			tokenizeByCamelCase(s, a);
			tokenizeByCamelCase(t, b);
			break;
		case PGS_UNIT_ALNUM:
		default:
			tokenizeByNonAlnum(s, a);
			tokenizeByNonAlnum(t, b);
			break;
	}

	summatches = 0.0;

	p = s->head;
	while (p != NULL)
	{
		maxvalue = 0.0;

		q = t->head;
		while (q != NULL)
		{
			double val = _mongeelkan(p->data, q->data);
			elog(DEBUG3, "p: %s; q: %s", p->data, q->data);
			if (val > maxvalue)
				maxvalue = val;
			q = q->next;
		}

		summatches += maxvalue;

		p = p->next;
	}

	/* normalized and unnormalized version are the same */
	res = summatches / s->size;

	elog(DEBUG1, "is normalized: %d", pgs_mongeelkan_is_normalized);
	elog(DEBUG1, "sum matches: %.3f", summatches);
	elog(DEBUG1, "s size: %d", s->size);
	elog(DEBUG1, "medistance(%s, %s) = %.3f", a, b, res);

	destroyTokenList(s);
	destroyTokenList(t);

	PG_RETURN_FLOAT8(res);
}
Ejemplo n.º 8
0
Datum
needlemanwunsch(PG_FUNCTION_ARGS)
{
	char		*a, *b;
	double		minvalue, maxvalue;
	float8		res;

	a = DatumGetPointer(DirectFunctionCall1(textout,
											PointerGetDatum(PG_GETARG_TEXT_P(0))));
	b = DatumGetPointer(DirectFunctionCall1(textout,
											PointerGetDatum(PG_GETARG_TEXT_P(1))));

	if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("argument exceeds the maximum length of %d bytes",
						PGS_MAX_STR_LEN)));

	maxvalue = (float8) max2(strlen(a), strlen(b));

	res = (float8) _nwunsch(a, b, pgs_nw_gap_penalty);

	elog(DEBUG1, "is normalized: %d", pgs_nw_is_normalized);
	elog(DEBUG1, "maximum length: %.3f", maxvalue);
	elog(DEBUG1, "nwdistance(%s, %s) = %.3f", a, b, res);

	if (maxvalue == 0.0)
		PG_RETURN_FLOAT8(1.0);
	else if (pgs_nw_is_normalized)
	{
		/* FIXME normalize nw result */
		minvalue = maxvalue;
		if (PGS_LEV_MAX_COST > pgs_nw_gap_penalty)
			maxvalue *= PGS_LEV_MAX_COST;
		else
			maxvalue *= pgs_nw_gap_penalty;

		if (PGS_LEV_MIN_COST < pgs_nw_gap_penalty)
			minvalue *= PGS_LEV_MIN_COST;
		else
			minvalue *= pgs_nw_gap_penalty;

		if (minvalue < 0.0)
		{
			maxvalue -= minvalue;
			res -= minvalue;
		}

		/* paranoia ? */
		if (maxvalue == 0.0)
			PG_RETURN_FLOAT8(0.0);
		else
		{
			res = 1.0 - (res / maxvalue);
			elog(DEBUG1, "nw(%s, %s) = %.3f", a, b, res);
			PG_RETURN_FLOAT8(res);
		}
	}
	else
		PG_RETURN_FLOAT8(res);
}
Ejemplo n.º 9
0
/*
 * Calculates the total errors used by Error Based Pruning (EBP).
 * This will be wrapped as a plc function.
 *
 * Parameters:
 *      total: 			the number of total cases represented by the node being processed.
 *      probability:   	the probability to mis-classify cases represented by the child nodes
 *      			    if they are pruned with EBP.
 *      conf_level:  	A certainty factor to calculate the confidence limits
 *      				for the probability of error using the binomial theorem.
 * Return:
 *      The computed total error
 */
Datum ebp_calc_errors(PG_FUNCTION_ARGS)
{
    float8 total_cases 	= PG_GETARG_FLOAT8(0);
    float8 probability 	= PG_GETARG_FLOAT8(1);
    float8 conf_level 	= PG_GETARG_FLOAT8(2);
    float8 result 		= 1.0L;
    float8 coeff 		= 0.0L;
    unsigned int i 		= 0;

    if (!is_float_zero(100 - conf_level))
    {
    	check_error_value
    		(
    			!(conf_level < MIN_CONFIDENCE_LEVEL || conf_level > MAX_CONFIDENCE_LEVEL),
    			"invalid confidence level:  %lf. Confidence level must be in range from 0.001 to 100",
    			conf_level
    		);

    	check_error_value
    		(
    			total_cases > 0,
    			"invalid number: %lf. The number of cases must be greater than 0",
    			total_cases
    		);

    	check_error_value
    		(
    			!(probability < 0 || probability > 1),
    			"invalid probability: %lf. The probability must be in range from 0 to 1",
    			probability
    		);

    	/*
    	 * confidence level value is in range from 0.001 to 1.0 for API c45_train
    	 * it should be divided by 100 when calculate addition error.
    	 * Therefore, the range of conf_level here is [0.00001, 1.0].
    	 */
    	conf_level = conf_level * 0.01;

		/* since the conf_level is in [0.00001, 1.0], the i will be in [1, length(CONFIDENCE_LEVEL) - 1]*/
		while (conf_level > CONFIDENCE_LEVEL[i]) i++;

    	check_error_value
    		(
    			i > 0 && i < ARRAY_SIZE(CONFIDENCE_LEVEL),
    			"invalid value: %d. The index of confidence level must be in range from 0 to 8",
    			i
    		);

		coeff = CONFIDENCE_DEV[i-1] +
				(CONFIDENCE_DEV[i] - CONFIDENCE_DEV[i-1]) *
				(conf_level - CONFIDENCE_LEVEL[i-1]) /
				(CONFIDENCE_LEVEL[i] - CONFIDENCE_LEVEL[i-1]);

		coeff *= coeff;

		check_error_value
    		(
    			coeff > 0,
    			"invalid coefficiency: %lf. It must be greater than 0",
    			coeff
    		);

		float8 num_errors = total_cases * (1 - probability);
    	result = ebp_calc_errors_internal(total_cases, num_errors, conf_level, coeff) + num_errors;
    }

	PG_RETURN_FLOAT8((float8)result);
}
Ejemplo n.º 10
0
/**
 * Returns a mean from an array of numbers.
 * by Paul A. Jungwirth
 */
Datum
array_to_mean(PG_FUNCTION_ARGS)
{
  // Our arguments:
  ArrayType *vals;

  // The array element type:
  Oid valsType;

  // The array element type widths for our input array:
  int16 valsTypeWidth;

  // The array element type "is passed by value" flags (not really used):
  bool valsTypeByValue;

  // The array element type alignment codes (not really used):
  char valsTypeAlignmentCode;

  // The array contents, as PostgreSQL "Datum" objects:
  Datum *valsContent;

  // List of "is null" flags for the array contents (not used):
  bool *valsNullFlags;

  // The size of the input array:
  int valsLength;

  float8 v = 0;
  int i;

  if (PG_ARGISNULL(0)) {
    ereport(ERROR, (errmsg("Null arrays not accepted")));
  }

  vals = PG_GETARG_ARRAYTYPE_P(0);

  if (ARR_NDIM(vals) == 0) {
    PG_RETURN_NULL();
  }
  if (ARR_NDIM(vals) > 1) {
    ereport(ERROR, (errmsg("One-dimesional arrays are required")));
  }

  if (array_contains_nulls(vals)) {
    ereport(ERROR, (errmsg("Array contains null elements")));
  }

  // Determine the array element types.
  valsType = ARR_ELEMTYPE(vals);

  if (valsType != INT2OID &&
      valsType != INT4OID &&
      valsType != INT8OID &&
      valsType != FLOAT4OID &&
      valsType != FLOAT8OID) {
    ereport(ERROR, (errmsg("Mean subject must be SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION values")));
  }

  valsLength = (ARR_DIMS(vals))[0];

  if (valsLength == 0) PG_RETURN_NULL();

  get_typlenbyvalalign(valsType, &valsTypeWidth, &valsTypeByValue, &valsTypeAlignmentCode);

  // Extract the array contents (as Datum objects).
  deconstruct_array(vals, valsType, valsTypeWidth, valsTypeByValue, valsTypeAlignmentCode,
&valsContent, &valsNullFlags, &valsLength);

  // Iterate through the contents and sum things up,
  // then return the mean:
  // Watch out for overflow:
  // http://stackoverflow.com/questions/1930454/what-is-a-good-solution-for-calculating-an-average-where-the-sum-of-all-values-e/1934266#1934266

  switch (valsType) {
    case INT2OID:
      for (i = 0; i < valsLength; i++) {
        v += (DatumGetInt16(valsContent[i]) - v) / (i + 1);
      }
      break;
    case INT4OID:
      for (i = 0; i < valsLength; i++) {
        v += (DatumGetInt32(valsContent[i]) - v) / (i + 1);
      }
      break;
    case INT8OID:
      for (i = 0; i < valsLength; i++) {
        v += (DatumGetInt64(valsContent[i]) - v) / (i + 1);
      }
      break;
    case FLOAT4OID:
      for (i = 0; i < valsLength; i++) {
        v += (DatumGetFloat4(valsContent[i]) - v) / (i + 1);
      }
      break;
    case FLOAT8OID:
      for (i = 0; i < valsLength; i++) {
        v += (DatumGetFloat8(valsContent[i]) - v) / (i + 1);
      }
      break;
    default:
      ereport(ERROR, (errmsg("Mean subject must be SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION values")));
      break;
  }
  PG_RETURN_FLOAT8(v);
}
Ejemplo n.º 11
0
Datum array_mad(PG_FUNCTION_ARGS) {
	// The formal PostgreSQL array object
	ArrayType *array;

	// The array element type
	Oid arrayElementType;

	// The array element type width
	int16 arrayElementTypeWidth;

	// The array element type "is passed by value" flags (not used, should always be true)
	bool arrayElementTypeByValue;

	// The array element type alignment codes (not used)
	char arrayElementTypeAlignmentCode;

	// The array contents, as PostgreSQL "datum" objects
	Datum *arrayContent;

	// List of "is null" flags for the array contents
	bool *arrayNullFlags;

	// The size of each array
	int arrayLength;

	int i,j, nelem;
	double median, mad;
	double *inarray;

	if (PG_ARGISNULL(0)) 
		ereport(ERROR, (errmsg("Null arrays not accepted")));

	// Get array from input
	array = PG_GETARG_ARRAYTYPE_P(0);

	if (ARR_NDIM(array) != 1) 
		ereport(ERROR, (errmsg("One-dimesional arrays are required")));

	if (array_contains_nulls(array)) 
		ereport(ERROR, (errmsg("Array contains null elements")));

	arrayLength = (ARR_DIMS(array))[0];
	arrayElementType = ARR_ELEMTYPE(array);
	get_typlenbyvalalign(arrayElementType, &arrayElementTypeWidth, &arrayElementTypeByValue, &arrayElementTypeAlignmentCode);
	deconstruct_array(array, arrayElementType, arrayElementTypeWidth, arrayElementTypeByValue, arrayElementTypeAlignmentCode, &arrayContent, &arrayNullFlags, &arrayLength);
	
	inarray = (double*)malloc(arrayLength*sizeof(double));  
	for (i=0; i<arrayLength; i++) {
		inarray[i] = DatumGetFloat4(arrayContent[i]);
	}

	gsl_sort (inarray, 1, arrayLength);

	median = gsl_stats_median_from_sorted_data (inarray, 1, arrayLength);
	for (i=0; i<arrayLength; i++) {
		inarray[i] = fabs(inarray[i]-median);
	}
	gsl_sort (inarray, 1, arrayLength);
	mad = 1.486 * gsl_stats_median_from_sorted_data (inarray, 1, arrayLength);

	PG_RETURN_FLOAT8(mad);
}
Ejemplo n.º 12
0
Datum
overlapcoefficient(PG_FUNCTION_ARGS)
{
	char		*a, *b;
	TokenList	*s, *t;
	int		atok, btok, comtok, alltok;
	int		mintok;
	float8		res;

	a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0))));
	b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1))));

	if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				errmsg("argument exceeds the maximum length of %d bytes",
					PGS_MAX_STR_LEN)));

	/* sets */
	s = initTokenList(1);
	t = initTokenList(1);

	switch (pgs_overlap_tokenizer)
	{
		case PGS_UNIT_WORD:
			tokenizeBySpace(s, a);
			tokenizeBySpace(t, b);
			break;
		case PGS_UNIT_GRAM:
			tokenizeByGram(s, a);
			tokenizeByGram(t, b);
			break;
		case PGS_UNIT_CAMELCASE:
			tokenizeByCamelCase(s, a);
			tokenizeByCamelCase(t, b);
			break;
		case PGS_UNIT_ALNUM:	/* default */
		default:
			tokenizeByNonAlnum(s, a);
			tokenizeByNonAlnum(t, b);
			break;
	}

	elog(DEBUG3, "Token List A");
	printToken(s);
	elog(DEBUG3, "Token List B");
	printToken(t);

	atok = s->size;
	btok = t->size;

	/* combine the sets */
	switch (pgs_overlap_tokenizer)
	{
		case PGS_UNIT_WORD:
			tokenizeBySpace(s, b);
			break;
		case PGS_UNIT_GRAM:
			tokenizeByGram(s, b);
			break;
		case PGS_UNIT_CAMELCASE:
			tokenizeByCamelCase(s, b);
			break;
		case PGS_UNIT_ALNUM:	/* default */
		default:
			tokenizeByNonAlnum(s, b);
			break;
	}

	elog(DEBUG3, "All Token List");
	printToken(s);

	alltok = s->size;

	destroyTokenList(s);
	destroyTokenList(t);

	comtok = atok + btok - alltok;

	mintok = min2(atok, btok);

	elog(DEBUG1, "is normalized: %d", pgs_overlap_is_normalized);
	elog(DEBUG1, "token list A size: %d", atok);
	elog(DEBUG1, "token list B size: %d", btok);
	elog(DEBUG1, "all tokens size: %d", alltok);
	elog(DEBUG1, "common tokens size: %d", comtok);
	elog(DEBUG1, "min between A and B sizes: %d", mintok);

	/* normalized and unnormalized version are the same */
	res = (float8) comtok / mintok;

	PG_RETURN_FLOAT8(res);
}
Ejemplo n.º 13
0
Datum
pascals(PG_FUNCTION_ARGS)
{
    float8   arg = PG_GETARG_DECIBEL(0);
    PG_RETURN_FLOAT8( arg );
}
Ejemplo n.º 14
0
Datum
pascaldecibel(PG_FUNCTION_ARGS)
{
    float8   arg = PG_GETARG_FLOAT8(0);
    PG_RETURN_FLOAT8( 10 * log10(arg) );
}
Ejemplo n.º 15
0
Datum BOX3D_ymin(PG_FUNCTION_ARGS)
{
	BOX3D *box = (BOX3D *)PG_GETARG_POINTER(0);
	PG_RETURN_FLOAT8(Min(box->ymin, box->ymax));
}
Ejemplo n.º 16
0
 Datum spherecircle_circ (PG_FUNCTION_ARGS)
 {
   SCIRCLE  * c  =  ( SCIRCLE * ) PG_GETARG_POINTER ( 0 ) ;
   PG_RETURN_FLOAT8 ( PID * ( sin ( c->radius ) ) );
 }
Ejemplo n.º 17
0
Datum BOX3D_zmax(PG_FUNCTION_ARGS)
{
	BOX3D *box = (BOX3D *)PG_GETARG_POINTER(0);
	PG_RETURN_FLOAT8(Max(box->zmin, box->zmax));
}
Ejemplo n.º 18
0
Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	/* int varRelid = PG_GETARG_INT32(3); */
	Oid relid;
	HeapTuple stats_tuple;
	GEOM_STATS *geomstats;
	/*
	 * This is to avoid casting the corresponding
	 * "type-punned" pointer, which would break
	 * "strict-aliasing rules".
	 */
	GEOM_STATS **gsptr=&geomstats;
	int geomstats_nvalues=0;
	Node *other;
	Var *self;
	GBOX search_box;
	float8 selectivity=0;

	POSTGIS_DEBUG(2, "geometry_gist_sel called");

	/* Fail if not a binary opclause (probably shouldn't happen) */
	if (list_length(args) != 2)
	{
		POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}


	/*
	 * Find the constant part
	 */
	other = (Node *) linitial(args);
	if ( ! IsA(other, Const) )
	{
		self = (Var *)other;
		other = (Node *) lsecond(args);
	}
	else
	{
		self = (Var *) lsecond(args);
	}

	if ( ! IsA(other, Const) )
	{
		POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	/*
	 * We are working on two constants..
	 * TODO: check if expression is true,
	 *       returned set would be either
	 *       the whole or none.
	 */
	if ( ! IsA(self, Var) )
	{
		POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	/*
	 * Convert the constant to a BOX
	 */

	if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
	{
		POSTGIS_DEBUG(3, "search box is EMPTY");
		PG_RETURN_FLOAT8(0.0);
	}

	POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);

	/*
	 * Get pg_statistic row
	 */

	relid = getrelid(self->varno, root->parse->rtable);

	stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
	if ( ! stats_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default estimate");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}


	if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gsptr, &geomstats_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity");

		ReleaseSysCache(stats_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues);

	POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f",
	               geomstats->xmin, geomstats->ymin);
	POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f",
	               geomstats->xmax, geomstats->ymax);
	POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows);
	POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols);
	POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);

	/*
	 * Do the estimation
	 */
	selectivity = estimate_selectivity(&search_box, geomstats);


	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);

	free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues);
	ReleaseSysCache(stats_tuple);
	PG_RETURN_FLOAT8(selectivity);

}