/*! * get the approximate count of objects with value arg * \param sketch a countmin sketch * \param arg the Datum we want to find the count of * \param funcOid the Postgres function that converts arg to a string */ int64 cmsketch_count_c(countmin sketch, Datum arg, Oid funcOid, Oid typOid) { bytea *nhash; /* get the md5 hash of the argument. */ nhash = sketch_md5_bytea(arg, typOid); return(cmsketch_count_md5_datum(sketch, nhash, funcOid)); }
/*! * transition function to maintain a CountMin sketch with * Most-Frequent Values */ Datum __mfvsketch_trans(PG_FUNCTION_ARGS) { bytea * transblob = PG_GETARG_BYTEA_P(0); Datum newdatum = PG_GETARG_DATUM(1); int max_mfvs = PG_GETARG_INT32(2); mfvtransval *transval; uint64 tmpcnt; int i; Datum md5_datum; /* * This function makes destructive updates to its arguments. * Make sure it's being called in an agg context. */ if (!(fcinfo->context && (IsA(fcinfo->context, AggState) #ifdef NOTGP || IsA(fcinfo->context, WindowAggState) #endif ))) elog(ERROR, "destructive pass by reference outside agg"); /* initialize if this is first call */ if (VARSIZE(transblob) <= sizeof(MFV_TRANSVAL_SZ(0))) { Oid typOid = get_fn_expr_argtype(fcinfo->flinfo, 1); transblob = mfv_init_transval(max_mfvs, typOid); } else { check_mfvtransval(transblob); } /* ignore NULL inputs */ if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) PG_RETURN_DATUM(PointerGetDatum(transblob)); transval = (mfvtransval *)VARDATA(transblob); if (transval->typOid != get_fn_expr_argtype(fcinfo->flinfo, 1)) { elog(ERROR, "cannot aggregate on elements with different types"); } /* insert into the countmin sketch */ md5_datum = countmin_trans_c(transval->sketch, newdatum, transval->outFuncOid, transval->typOid); tmpcnt = cmsketch_count_md5_datum(transval->sketch, (bytea *)DatumGetPointer(md5_datum), transval->outFuncOid); i = mfv_find(transblob, newdatum); if (i > -1) { transval->mfvs[i].cnt = tmpcnt; } else { /* try to insert as either a new or replacement entry */ for (i = 0; i < (int)transval->max_mfvs; i++) { if ((i == (int)transval->next_mfv)) { /* room for new */ transblob = mfv_transval_append(transblob, newdatum); transval = (mfvtransval *)VARDATA(transblob); transval->mfvs[i].cnt = tmpcnt; break; } else if (transval->mfvs[i].cnt < tmpcnt) { /* arg beats this mfv */ transblob = mfv_transval_replace(transblob, newdatum, i); transval = (mfvtransval *)VARDATA(transblob); transval->mfvs[i].cnt = tmpcnt; break; } /* else this is not a frequent value */ } } PG_RETURN_DATUM(PointerGetDatum(transblob)); }