Exemplo n.º 1
0
/*!
 * get the approximate count of objects with value arg
 * \param sketch a countmin sketch
 * \param arg the Datum we want to find the count of
 * \param funcOid the Postgres function that converts arg to a string
 */
int64 cmsketch_count_c(countmin sketch, Datum arg, Oid funcOid, Oid typOid)
{
    bytea *nhash;

    /* get the md5 hash of the argument. */
    nhash = sketch_md5_bytea(arg, typOid);
    return(cmsketch_count_md5_datum(sketch, nhash, funcOid));
}
Exemplo n.º 2
0
/*!
 *  transition function to maintain a CountMin sketch with
 *  Most-Frequent Values
 */
Datum __mfvsketch_trans(PG_FUNCTION_ARGS)
{
    bytea *      transblob = PG_GETARG_BYTEA_P(0);
    Datum        newdatum  = PG_GETARG_DATUM(1);
    int          max_mfvs  = PG_GETARG_INT32(2);
    mfvtransval *transval;
    uint64       tmpcnt;
    int          i;
    Datum        md5_datum;

    /*
     * This function makes destructive updates to its arguments.
     * Make sure it's being called in an agg context.
     */
    if (!(fcinfo->context &&
          (IsA(fcinfo->context, AggState)
   #ifdef NOTGP
           || IsA(fcinfo->context, WindowAggState)
   #endif
          )))
        elog(ERROR,
             "destructive pass by reference outside agg");

    /* initialize if this is first call */
    if (VARSIZE(transblob) <= sizeof(MFV_TRANSVAL_SZ(0))) {
        Oid typOid = get_fn_expr_argtype(fcinfo->flinfo, 1);
        transblob = mfv_init_transval(max_mfvs, typOid);
    }
    else {
        check_mfvtransval(transblob);
    }

    /* ignore NULL inputs */
    if (PG_ARGISNULL(1) || PG_ARGISNULL(2))
        PG_RETURN_DATUM(PointerGetDatum(transblob));

    transval = (mfvtransval *)VARDATA(transblob);
    if (transval->typOid != get_fn_expr_argtype(fcinfo->flinfo, 1)) {
        elog(ERROR, "cannot aggregate on elements with different types");
    }
    /* insert into the countmin sketch */
    md5_datum = countmin_trans_c(transval->sketch,
                                newdatum,
                                transval->outFuncOid,
                                transval->typOid);

    tmpcnt = cmsketch_count_md5_datum(transval->sketch,
                                      (bytea *)DatumGetPointer(md5_datum),
                                      transval->outFuncOid);
    i = mfv_find(transblob, newdatum);

    if (i > -1) {
        transval->mfvs[i].cnt = tmpcnt;
    }
    else {
        /* try to insert as either a new or replacement entry */
        for (i = 0; i < (int)transval->max_mfvs; i++) {
            if ((i == (int)transval->next_mfv)) {
                /* room for new */
                transblob = mfv_transval_append(transblob, newdatum);
                transval = (mfvtransval *)VARDATA(transblob);
                transval->mfvs[i].cnt = tmpcnt;
                break;
            }
            else if (transval->mfvs[i].cnt < tmpcnt) {
                /* arg beats this mfv */
                transblob = mfv_transval_replace(transblob, newdatum, i);
                transval = (mfvtransval *)VARDATA(transblob);
                transval->mfvs[i].cnt = tmpcnt;
                break;
            }
            /* else this is not a frequent value */
        }
    }
    PG_RETURN_DATUM(PointerGetDatum(transblob));
}