Datum pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS) { /* time is already in msec, just convert to double for presentation */ PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time); }
Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); Node *arg1, *arg2; Var *var1, *var2; Oid relid1, relid2; HeapTuple stats1_tuple, stats2_tuple; GEOG_STATS *geogstats1, *geogstats2; /* * These are to avoid casting the corresponding * "type-punned" pointers, which would break * "strict-aliasing rules". */ GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2; int geogstats1_nvalues = 0, geogstats2_nvalues = 0; float8 selectivity1 = 0.0, selectivity2 = 0.0; float4 num1_tuples = 0.0, num2_tuples = 0.0; float4 total_tuples = 0.0, rows_returned = 0.0; GBOX search_box; /** * Join selectivity algorithm. To calculation the selectivity we * calculate the intersection of the two column sample extents, * sum the results, and then multiply by two since for each * geometry in col 1 that intersects a geometry in col 2, the same * will also be true. */ POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype); /* * We'll only respond to an inner join/unknown context join */ if (jointype != JOIN_INNER) { elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * Determine the oids of the geometry columns we are working with */ arg1 = (Node *) linitial(args); arg2 = (Node *) lsecond(args); if (!IsA(arg1, Var) || !IsA(arg2, Var)) { elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } var1 = (Var *)arg1; var2 = (Var *)arg2; relid1 = getrelid(var1->varno, root->parse->rtable); relid2 = getrelid(var2->varno, root->parse->rtable); POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2); /* Read the stats tuple from the first column */ stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0); if ( ! stats1_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs1ptr, &geogstats1_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* Read the stats tuple from the second column */ stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0); if ( ! stats2_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs2ptr, &geogstats2_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats2_tuple); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /** * Setup the search box - this is the intersection of the two column * extents. */ search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin); search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin); search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin); search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax); search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax); search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax); /* If the extents of the two columns don't intersect, return zero */ if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax || search_box.zmin > search_box.zmax) PG_RETURN_FLOAT8(0.0); POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax); POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax); POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* Do the selectivity */ selectivity1 = estimate_selectivity(&search_box, geogstats1); selectivity2 = estimate_selectivity(&search_box, geogstats2); POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2); /* * OK, so before we calculate the join selectivity we also need to * know the number of tuples in each of the columns since * estimate_selectivity returns the number of estimated tuples * divided by the total number of tuples. */ num1_tuples = geogstats1->totalrows; num2_tuples = geogstats2->totalrows; /* Free the statistic tuples */ free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues); ReleaseSysCache(stats2_tuple); /* * Finally calculate the estimate of the number of rows returned * * = 2 * (nrows from col1 + nrows from col2) / * total nrows in col1 x total nrows in col2 * * The factor of 2 accounts for the fact that for each tuple in * col 1 matching col 2, * there will be another match in col 2 matching col 1 */ total_tuples = num1_tuples * num2_tuples; rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2)); POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1); POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2); POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned); /* * One (or both) tuple count is zero... * We return default selectivity estimate. * We could probably attempt at an estimate * w/out looking at tables tuple count, with * a function of selectivity1, selectivity2. */ if ( ! total_tuples ) { POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( rows_returned > total_tuples ) PG_RETURN_FLOAT8(1.0); PG_RETURN_FLOAT8(rows_returned / total_tuples); }
Datum gbfp_distance(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); // bytea *query = PG_GETARG_DATA_TYPE_P(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bytea *key = (bytea*)DatumGetPointer(entry->key); bytea *query; double nCommon, nCommonUp, nCommonDown, nQuery, distance; double nKey = 0.0; fcinfo->flinfo->fn_extra = SearchBitmapFPCache( fcinfo->flinfo->fn_extra, fcinfo->flinfo->fn_mcxt, PG_GETARG_DATUM(1), NULL, NULL,&query); if (ISALLTRUE(query)) elog(ERROR, "Query malformed"); /* * Counts basic numbers, but don't count nKey on inner * page (see comments below) */ nQuery = (double)sizebitvec(query); if (ISALLTRUE(key)) { if (GIST_LEAF(entry)) nKey = (double)SIGLENBIT(query); nCommon = nQuery; } else { int i, cnt = 0; unsigned char *pk = (unsigned char*)VARDATA(key), *pq = (unsigned char*)VARDATA(query); if (SIGLEN(key) != SIGLEN(query)) elog(ERROR, "All fingerprints should be the same length"); #ifndef USE_BUILTIN_POPCOUNT for(i=0;i<SIGLEN(key);i++) cnt += number_of_ones[ pk[i] & pq[i] ]; #else unsigned eidx=SIGLEN(key)/sizeof(unsigned int); for(i=0;i<SIGLEN(key)/sizeof(unsigned int);++i){ cnt += __builtin_popcount(((unsigned int *)pk)[i] & ((unsigned int *)pq)[i]); } for(i=eidx*sizeof(unsigned);i<SIGLEN(key);++i){ cnt += number_of_ones[ pk[i] & pq[i] ]; } #endif nCommon = (double)cnt; if (GIST_LEAF(entry)) nKey = (double)sizebitvec(key); } nCommonUp = nCommon; nCommonDown = nCommon; switch(strategy) { case RDKitOrderByTanimotoStrategy: /* * Nsame / (Na + Nb - Nsame) */ if (GIST_LEAF(entry)) { distance = nCommonUp / (nKey + nQuery - nCommonUp); } else { distance = nCommonUp / nQuery; } break; case RDKitOrderByDiceStrategy: /* * 2 * Nsame / (Na + Nb) */ if (GIST_LEAF(entry)) { distance = 2.0 * nCommonUp / (nKey + nQuery); } else { distance = 2.0 * nCommonUp / (nCommonDown + nQuery); } break; default: elog(ERROR,"Unknown strategy: %d", strategy); } PG_RETURN_FLOAT8(1.0 - distance); }
/* * ltreeparentsel - Selectivity of parent relationship for ltree data types. */ Datum ltreeparentsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; double selec; /* * If expression is not variable <@ something or something <@ variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); /* * If the something is a NULL constant, assume operator is strict and * return zero, ie, operator will never return TRUE. */ if (IsA(other, Const) && ((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } if (IsA(other, Const)) { /* Variable is being compared to a known non-null constant */ Datum constval = ((Const *) other)->constvalue; FmgrInfo contproc; double mcvsum; double mcvsel; double nullfrac; fmgr_info(get_opcode(operator), &contproc); /* * Is the constant "<@" to any of the column's most common values? */ mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft, &mcvsum); /* * If the histogram is large enough, see what fraction of it the * constant is "<@" to, and assume that's representative of the * non-MCV population. Otherwise use the default selectivity for the * non-MCV population. */ selec = histogram_selectivity(&vardata, &contproc, constval, varonleft, 100, 1); if (selec < 0) { /* Nope, fall back on default */ selec = DEFAULT_PARENT_SEL; } else { /* Yes, but don't believe extremely small or large estimates. */ if (selec < 0.0001) selec = 0.0001; else if (selec > 0.9999) selec = 0.9999; } if (HeapTupleIsValid(vardata.statsTuple)) nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; else nullfrac = 0.0; /* * Now merge the results from the MCV and histogram calculations, * realizing that the histogram covers only the non-null values that * are not listed in MCV. */ selec *= 1.0 - nullfrac - mcvsum; selec += mcvsel; } else selec = DEFAULT_PARENT_SEL; ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
Datum geography_gist_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOG_STATS *geogstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOG_STATS **gsptr=&geogstats; int geogstats_nvalues = 0; Node *other; Var *self; GBOX search_box; float8 selectivity = 0; POSTGIS_DEBUG(2, "geography_gist_selectivity called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * This selectivity function is invoked by a clause of the form <arg> && <arg> * * In typical usage, one argument will be a column reference, while the other will * be a geography constant; set self to point to the column argument and other * to point to the constant argument. */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * We don't have a nice <const> && <var> or <var> && <const> * situation here. <const> && <const> would probably get evaluated * away by PgSQL earlier on. <func> && <const> is harder, and the * case we get often is <const> && ST_Expand(<var>), which does * actually have a subtly different selectivity than a bae * <const> && <var> call. It's calculatable though, by expanding * every cell in the histgram appropriately. * * Discussion: http://trac.osgeo.org/postgis/ticket/1828 * * To do? Do variable selectivity based on the <func> node. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity"); // PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); PG_RETURN_FLOAT8(0.33333); } /* Convert coordinates to 3D geodesic */ search_box.flags = 1; FLAGS_SET_GEODETIC(search_box.flags, 1); if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, " search box cannot be calculated"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geogstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax); POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx); POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy); POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz); POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geogstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }
Datum decibelpascal(PG_FUNCTION_ARGS) { float8 arg = PG_GETARG_FLOAT8(0); PG_RETURN_FLOAT8( pow( 10, arg / 10.0 )); }
Datum mongeelkan(PG_FUNCTION_ARGS) { char *a, *b; TokenList *s, *t; Token *p, *q; double summatches; double maxvalue; float8 res; a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0)))); b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1)))); if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("argument exceeds the maximum length of %d bytes", PGS_MAX_STR_LEN))); /* lists */ s = initTokenList(0); t = initTokenList(0); switch (pgs_mongeelkan_tokenizer) { case PGS_UNIT_WORD: tokenizeBySpace(s, a); tokenizeBySpace(t, b); break; case PGS_UNIT_GRAM: tokenizeByGram(s, a); tokenizeByGram(t, b); break; case PGS_UNIT_CAMELCASE: tokenizeByCamelCase(s, a); tokenizeByCamelCase(t, b); break; case PGS_UNIT_ALNUM: default: tokenizeByNonAlnum(s, a); tokenizeByNonAlnum(t, b); break; } summatches = 0.0; p = s->head; while (p != NULL) { maxvalue = 0.0; q = t->head; while (q != NULL) { double val = _mongeelkan(p->data, q->data); elog(DEBUG3, "p: %s; q: %s", p->data, q->data); if (val > maxvalue) maxvalue = val; q = q->next; } summatches += maxvalue; p = p->next; } /* normalized and unnormalized version are the same */ res = summatches / s->size; elog(DEBUG1, "is normalized: %d", pgs_mongeelkan_is_normalized); elog(DEBUG1, "sum matches: %.3f", summatches); elog(DEBUG1, "s size: %d", s->size); elog(DEBUG1, "medistance(%s, %s) = %.3f", a, b, res); destroyTokenList(s); destroyTokenList(t); PG_RETURN_FLOAT8(res); }
Datum needlemanwunsch(PG_FUNCTION_ARGS) { char *a, *b; double minvalue, maxvalue; float8 res; a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0)))); b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1)))); if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("argument exceeds the maximum length of %d bytes", PGS_MAX_STR_LEN))); maxvalue = (float8) max2(strlen(a), strlen(b)); res = (float8) _nwunsch(a, b, pgs_nw_gap_penalty); elog(DEBUG1, "is normalized: %d", pgs_nw_is_normalized); elog(DEBUG1, "maximum length: %.3f", maxvalue); elog(DEBUG1, "nwdistance(%s, %s) = %.3f", a, b, res); if (maxvalue == 0.0) PG_RETURN_FLOAT8(1.0); else if (pgs_nw_is_normalized) { /* FIXME normalize nw result */ minvalue = maxvalue; if (PGS_LEV_MAX_COST > pgs_nw_gap_penalty) maxvalue *= PGS_LEV_MAX_COST; else maxvalue *= pgs_nw_gap_penalty; if (PGS_LEV_MIN_COST < pgs_nw_gap_penalty) minvalue *= PGS_LEV_MIN_COST; else minvalue *= pgs_nw_gap_penalty; if (minvalue < 0.0) { maxvalue -= minvalue; res -= minvalue; } /* paranoia ? */ if (maxvalue == 0.0) PG_RETURN_FLOAT8(0.0); else { res = 1.0 - (res / maxvalue); elog(DEBUG1, "nw(%s, %s) = %.3f", a, b, res); PG_RETURN_FLOAT8(res); } } else PG_RETURN_FLOAT8(res); }
/* * Calculates the total errors used by Error Based Pruning (EBP). * This will be wrapped as a plc function. * * Parameters: * total: the number of total cases represented by the node being processed. * probability: the probability to mis-classify cases represented by the child nodes * if they are pruned with EBP. * conf_level: A certainty factor to calculate the confidence limits * for the probability of error using the binomial theorem. * Return: * The computed total error */ Datum ebp_calc_errors(PG_FUNCTION_ARGS) { float8 total_cases = PG_GETARG_FLOAT8(0); float8 probability = PG_GETARG_FLOAT8(1); float8 conf_level = PG_GETARG_FLOAT8(2); float8 result = 1.0L; float8 coeff = 0.0L; unsigned int i = 0; if (!is_float_zero(100 - conf_level)) { check_error_value ( !(conf_level < MIN_CONFIDENCE_LEVEL || conf_level > MAX_CONFIDENCE_LEVEL), "invalid confidence level: %lf. Confidence level must be in range from 0.001 to 100", conf_level ); check_error_value ( total_cases > 0, "invalid number: %lf. The number of cases must be greater than 0", total_cases ); check_error_value ( !(probability < 0 || probability > 1), "invalid probability: %lf. The probability must be in range from 0 to 1", probability ); /* * confidence level value is in range from 0.001 to 1.0 for API c45_train * it should be divided by 100 when calculate addition error. * Therefore, the range of conf_level here is [0.00001, 1.0]. */ conf_level = conf_level * 0.01; /* since the conf_level is in [0.00001, 1.0], the i will be in [1, length(CONFIDENCE_LEVEL) - 1]*/ while (conf_level > CONFIDENCE_LEVEL[i]) i++; check_error_value ( i > 0 && i < ARRAY_SIZE(CONFIDENCE_LEVEL), "invalid value: %d. The index of confidence level must be in range from 0 to 8", i ); coeff = CONFIDENCE_DEV[i-1] + (CONFIDENCE_DEV[i] - CONFIDENCE_DEV[i-1]) * (conf_level - CONFIDENCE_LEVEL[i-1]) / (CONFIDENCE_LEVEL[i] - CONFIDENCE_LEVEL[i-1]); coeff *= coeff; check_error_value ( coeff > 0, "invalid coefficiency: %lf. It must be greater than 0", coeff ); float8 num_errors = total_cases * (1 - probability); result = ebp_calc_errors_internal(total_cases, num_errors, conf_level, coeff) + num_errors; } PG_RETURN_FLOAT8((float8)result); }
/** * Returns a mean from an array of numbers. * by Paul A. Jungwirth */ Datum array_to_mean(PG_FUNCTION_ARGS) { // Our arguments: ArrayType *vals; // The array element type: Oid valsType; // The array element type widths for our input array: int16 valsTypeWidth; // The array element type "is passed by value" flags (not really used): bool valsTypeByValue; // The array element type alignment codes (not really used): char valsTypeAlignmentCode; // The array contents, as PostgreSQL "Datum" objects: Datum *valsContent; // List of "is null" flags for the array contents (not used): bool *valsNullFlags; // The size of the input array: int valsLength; float8 v = 0; int i; if (PG_ARGISNULL(0)) { ereport(ERROR, (errmsg("Null arrays not accepted"))); } vals = PG_GETARG_ARRAYTYPE_P(0); if (ARR_NDIM(vals) == 0) { PG_RETURN_NULL(); } if (ARR_NDIM(vals) > 1) { ereport(ERROR, (errmsg("One-dimesional arrays are required"))); } if (array_contains_nulls(vals)) { ereport(ERROR, (errmsg("Array contains null elements"))); } // Determine the array element types. valsType = ARR_ELEMTYPE(vals); if (valsType != INT2OID && valsType != INT4OID && valsType != INT8OID && valsType != FLOAT4OID && valsType != FLOAT8OID) { ereport(ERROR, (errmsg("Mean subject must be SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION values"))); } valsLength = (ARR_DIMS(vals))[0]; if (valsLength == 0) PG_RETURN_NULL(); get_typlenbyvalalign(valsType, &valsTypeWidth, &valsTypeByValue, &valsTypeAlignmentCode); // Extract the array contents (as Datum objects). deconstruct_array(vals, valsType, valsTypeWidth, valsTypeByValue, valsTypeAlignmentCode, &valsContent, &valsNullFlags, &valsLength); // Iterate through the contents and sum things up, // then return the mean: // Watch out for overflow: // http://stackoverflow.com/questions/1930454/what-is-a-good-solution-for-calculating-an-average-where-the-sum-of-all-values-e/1934266#1934266 switch (valsType) { case INT2OID: for (i = 0; i < valsLength; i++) { v += (DatumGetInt16(valsContent[i]) - v) / (i + 1); } break; case INT4OID: for (i = 0; i < valsLength; i++) { v += (DatumGetInt32(valsContent[i]) - v) / (i + 1); } break; case INT8OID: for (i = 0; i < valsLength; i++) { v += (DatumGetInt64(valsContent[i]) - v) / (i + 1); } break; case FLOAT4OID: for (i = 0; i < valsLength; i++) { v += (DatumGetFloat4(valsContent[i]) - v) / (i + 1); } break; case FLOAT8OID: for (i = 0; i < valsLength; i++) { v += (DatumGetFloat8(valsContent[i]) - v) / (i + 1); } break; default: ereport(ERROR, (errmsg("Mean subject must be SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION values"))); break; } PG_RETURN_FLOAT8(v); }
Datum array_mad(PG_FUNCTION_ARGS) { // The formal PostgreSQL array object ArrayType *array; // The array element type Oid arrayElementType; // The array element type width int16 arrayElementTypeWidth; // The array element type "is passed by value" flags (not used, should always be true) bool arrayElementTypeByValue; // The array element type alignment codes (not used) char arrayElementTypeAlignmentCode; // The array contents, as PostgreSQL "datum" objects Datum *arrayContent; // List of "is null" flags for the array contents bool *arrayNullFlags; // The size of each array int arrayLength; int i,j, nelem; double median, mad; double *inarray; if (PG_ARGISNULL(0)) ereport(ERROR, (errmsg("Null arrays not accepted"))); // Get array from input array = PG_GETARG_ARRAYTYPE_P(0); if (ARR_NDIM(array) != 1) ereport(ERROR, (errmsg("One-dimesional arrays are required"))); if (array_contains_nulls(array)) ereport(ERROR, (errmsg("Array contains null elements"))); arrayLength = (ARR_DIMS(array))[0]; arrayElementType = ARR_ELEMTYPE(array); get_typlenbyvalalign(arrayElementType, &arrayElementTypeWidth, &arrayElementTypeByValue, &arrayElementTypeAlignmentCode); deconstruct_array(array, arrayElementType, arrayElementTypeWidth, arrayElementTypeByValue, arrayElementTypeAlignmentCode, &arrayContent, &arrayNullFlags, &arrayLength); inarray = (double*)malloc(arrayLength*sizeof(double)); for (i=0; i<arrayLength; i++) { inarray[i] = DatumGetFloat4(arrayContent[i]); } gsl_sort (inarray, 1, arrayLength); median = gsl_stats_median_from_sorted_data (inarray, 1, arrayLength); for (i=0; i<arrayLength; i++) { inarray[i] = fabs(inarray[i]-median); } gsl_sort (inarray, 1, arrayLength); mad = 1.486 * gsl_stats_median_from_sorted_data (inarray, 1, arrayLength); PG_RETURN_FLOAT8(mad); }
Datum overlapcoefficient(PG_FUNCTION_ARGS) { char *a, *b; TokenList *s, *t; int atok, btok, comtok, alltok; int mintok; float8 res; a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0)))); b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1)))); if (strlen(a) > PGS_MAX_STR_LEN || strlen(b) > PGS_MAX_STR_LEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("argument exceeds the maximum length of %d bytes", PGS_MAX_STR_LEN))); /* sets */ s = initTokenList(1); t = initTokenList(1); switch (pgs_overlap_tokenizer) { case PGS_UNIT_WORD: tokenizeBySpace(s, a); tokenizeBySpace(t, b); break; case PGS_UNIT_GRAM: tokenizeByGram(s, a); tokenizeByGram(t, b); break; case PGS_UNIT_CAMELCASE: tokenizeByCamelCase(s, a); tokenizeByCamelCase(t, b); break; case PGS_UNIT_ALNUM: /* default */ default: tokenizeByNonAlnum(s, a); tokenizeByNonAlnum(t, b); break; } elog(DEBUG3, "Token List A"); printToken(s); elog(DEBUG3, "Token List B"); printToken(t); atok = s->size; btok = t->size; /* combine the sets */ switch (pgs_overlap_tokenizer) { case PGS_UNIT_WORD: tokenizeBySpace(s, b); break; case PGS_UNIT_GRAM: tokenizeByGram(s, b); break; case PGS_UNIT_CAMELCASE: tokenizeByCamelCase(s, b); break; case PGS_UNIT_ALNUM: /* default */ default: tokenizeByNonAlnum(s, b); break; } elog(DEBUG3, "All Token List"); printToken(s); alltok = s->size; destroyTokenList(s); destroyTokenList(t); comtok = atok + btok - alltok; mintok = min2(atok, btok); elog(DEBUG1, "is normalized: %d", pgs_overlap_is_normalized); elog(DEBUG1, "token list A size: %d", atok); elog(DEBUG1, "token list B size: %d", btok); elog(DEBUG1, "all tokens size: %d", alltok); elog(DEBUG1, "common tokens size: %d", comtok); elog(DEBUG1, "min between A and B sizes: %d", mintok); /* normalized and unnormalized version are the same */ res = (float8) comtok / mintok; PG_RETURN_FLOAT8(res); }
Datum pascals(PG_FUNCTION_ARGS) { float8 arg = PG_GETARG_DECIBEL(0); PG_RETURN_FLOAT8( arg ); }
Datum pascaldecibel(PG_FUNCTION_ARGS) { float8 arg = PG_GETARG_FLOAT8(0); PG_RETURN_FLOAT8( 10 * log10(arg) ); }
Datum BOX3D_ymin(PG_FUNCTION_ARGS) { BOX3D *box = (BOX3D *)PG_GETARG_POINTER(0); PG_RETURN_FLOAT8(Min(box->ymin, box->ymax)); }
Datum spherecircle_circ (PG_FUNCTION_ARGS) { SCIRCLE * c = ( SCIRCLE * ) PG_GETARG_POINTER ( 0 ) ; PG_RETURN_FLOAT8 ( PID * ( sin ( c->radius ) ) ); }
Datum BOX3D_zmax(PG_FUNCTION_ARGS) { BOX3D *box = (BOX3D *)PG_GETARG_POINTER(0); PG_RETURN_FLOAT8(Max(box->zmin, box->zmax)); }
Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOM_STATS *geomstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOM_STATS **gsptr=&geomstats; int geomstats_nvalues=0; Node *other; Var *self; GBOX search_box; float8 selectivity=0; POSTGIS_DEBUG(2, "geometry_gist_sel called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Find the constant part */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * We are working on two constants.. * TODO: check if expression is true, * returned set would be either * the whole or none. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Convert the constant to a BOX */ if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, "search box is EMPTY"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geomstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f", geomstats->xmin, geomstats->ymin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f", geomstats->xmax, geomstats->ymax); POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows); POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols); POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geomstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }