Exemple #1
0
/*!
 * copy datum <c>dat</c> into the offset of position <c>index</c> of
 * the mfv sketch stored in <c>transblob</c>.
 *
 * <i>Caller beware: this helper return assumes that
 * <c>dat</c> is small enough to fit in the storage
 * currently used by the datum at position <c>index</c>.</i>
 *
 * \param transblob a bytea holding and mfv transval
 * \param index the index of the destination for copying
 * \param dat the datum to be copied into the transval
 */
void mfv_copy_datum(bytea *transblob, int index, Datum dat)
{
    mfvtransval *transval = (mfvtransval *)VARDATA(transblob);
    size_t       datumLen = ExtractDatumLen(dat, transval->typLen, transval->typByVal, -1);
    void *       curval = mfv_transval_getval(transblob,index);

    memmove(curval, (void *)DatumExtractPointer(dat, transval->typByVal), datumLen);
}
Exemple #2
0
/*!
 * replace the value at position i of the mfvsketch with dat
 *
 * \param transblob the transition value packed into a bytea
 * \param dat the value to be inserted
 * \param i the position to replace
 */
bytea *mfv_transval_replace(bytea *transblob, Datum dat, int i)
{
    /*
     * if new value is smaller than old, we overwrite at the old offset.
     * otherwise we call mfv_transval_insert_at which will take care of
     * space allocation for the new value
     */
    mfvtransval *transval = (mfvtransval *)VARDATA(transblob);
    size_t       datumLen = ExtractDatumLen(dat, transval->typLen, transval->typByVal, -1);
    void *       tmpp = mfv_transval_getval(transblob,i);
    Datum        oldDat = PointerExtractDatum(tmpp, transval->typByVal);
    size_t       oldLen = ExtractDatumLen(oldDat, transval->typLen, transval->typByVal, -1);

    if (datumLen <= oldLen) {
        mfv_copy_datum(transblob, i, dat);
        return transblob;
    }
    else return(mfv_transval_insert_at(transblob, dat, i));
}
Exemple #3
0
/*!
 * look to see if the mfvsketch currently has <c>val</c>
 * stored as one of its most-frequent values.
 * Returns the offset in the <c>mfvs</c> array, or -1
 * if not found.
 * NOTE: a 0 return value means the item <i>was found</i>
 * at offset 0!
 * \param blob a bytea holding an mfv transval
 * \param val the datum to search for
 */
int mfv_find(bytea *blob, Datum val)
{
    mfvtransval *transval = (mfvtransval *)VARDATA(blob);
    unsigned     i;
    uint32       len;
    void *       datp;
    Datum        iDat;
    void        *valp = DatumExtractPointer(val, transval->typByVal);

    /* look for existing entry for this value */
    for (i = 0; i < transval->next_mfv; i++) {
        /* if they're the same */
        datp = mfv_transval_getval(blob,i);
        iDat = PointerExtractDatum(datp, transval->typByVal);

        if ((len = ExtractDatumLen(iDat, transval->typLen, transval->typByVal, -1))
            == ExtractDatumLen(val, transval->typLen, transval->typByVal, -1)) {
            if (!memcmp(datp, valp, len))
                /* arg is an mfv */
                return(i);
        }
    }
    return(-1);
}
Exemple #4
0
/*!
 * implementation of the merge of two mfv sketches.  we
 * first merge the embedded countmin sketches to get the
 * sums of the counts, and then use those sums to pick the
 * top values for the resulting histogram.  We overwrite
 * the first argument and return it.
 * \param transblob1 an mfv transval stored inside a bytea
 * \param transblob2 another mfv transval in a bytea
 */
bytea *mfvsketch_merge_c(bytea *transblob1, bytea *transblob2)
{
    mfvtransval *transval1 = (mfvtransval *)VARDATA(transblob1);
    mfvtransval *transval2 = (mfvtransval *)VARDATA(transblob2);
    void        *newblob;
    mfvtransval *newval;
    uint32       i, j, cnt;

    /* handle uninitialized args */
    if (VARSIZE(transblob1) <= sizeof(MFV_TRANSVAL_SZ(0))
        && VARSIZE(transblob2) <= sizeof(MFV_TRANSVAL_SZ(0)))
        return(transblob1);
    else if (VARSIZE(transblob1) <= sizeof(MFV_TRANSVAL_SZ(0))) {
        transblob1 = mfv_init_transval(transval2->max_mfvs, transval2->typOid);
        transval1 = (mfvtransval *)VARDATA(transblob1);
    }
    else if (VARSIZE(transblob2) <= sizeof(MFV_TRANSVAL_SZ(0))) {
        transblob2 = mfv_init_transval(transval1->max_mfvs, transval1->typOid);
        transval2 = (mfvtransval *)VARDATA(transblob2);
    }
    check_mfvtransval(transblob1);
    check_mfvtransval(transblob2);

    if ( transval1->typOid != transval2->typOid ) {
        elog(ERROR, "cannot merge two transition state with different element type");
    }

    /* initialize output */
    newblob   = mfv_init_transval(transval1->max_mfvs, transval1->typOid);
    newval    = (mfvtransval *)VARDATA(newblob);

    /* combine sketches */
    for (i = 0; i < DEPTH; i++)
        for (j = 0; j < NUMCOUNTERS; j++)
            newval->sketch[i][j] = transval1->sketch[i][j]
                                   + transval2->sketch[i][j];

    /* recompute the counts using the merged sketch */
    for (i = 0; i < transval1->next_mfv; i++) {
        void *tmpp = mfv_transval_getval(transblob1,i);
        Datum dat = PointerExtractDatum(tmpp, transval1->typByVal);

        transval1->mfvs[i].cnt = cmsketch_count_c(newval->sketch,
                                                  dat,
                                                  newval->outFuncOid,
                                                  newval->typOid);
    }
    for (i = 0; i < transval2->next_mfv; i++) {
        void *tmpp = mfv_transval_getval(transblob2,i);
        Datum dat = PointerExtractDatum(tmpp, transval2->typByVal);

        transval2->mfvs[i].cnt = cmsketch_count_c(newval->sketch,
                                                  dat,
                                                  newval->outFuncOid,
                                                  newval->typOid);
    }

    /* now take maxes on mfvs in a sort-merge style, copying into transval1  */
    qsort(transval1->mfvs, transval1->next_mfv, sizeof(offsetcnt), cnt_cmp_desc);
    qsort(transval2->mfvs, transval2->next_mfv, sizeof(offsetcnt), cnt_cmp_desc);

    /* choose top k from transval1 and transval2 */
    for (i = j = cnt = 0;
         cnt < newval->max_mfvs
         && (j < transval2->next_mfv || i < transval1->next_mfv);
         cnt++) {
        Datum iDatum, jDatum;

	if (i < transval1->next_mfv &&
            (j == transval2->next_mfv
             || transval1->mfvs[i].cnt >= transval2->mfvs[j].cnt)) {
          /* next item comes from transval1 */
          iDatum = PointerExtractDatum(mfv_transval_getval(transblob1, i),
                                       transval1->typByVal);
          newblob = mfv_transval_append(newblob, iDatum);
          newval = (mfvtransval *)VARDATA(newblob);
          newval->mfvs[cnt].cnt = transval1->mfvs[i].cnt;
          i++;
        }
        else if (j < transval2->next_mfv &&
                 (i == transval1->next_mfv
                  || transval1->mfvs[i].cnt < transval2->mfvs[j].cnt)) {
          /* next item comes from transval2 */
          jDatum = PointerExtractDatum(mfv_transval_getval(transblob2, j),
                                       transval2->typByVal);
          newblob = mfv_transval_append(newblob, jDatum);
          newval = (mfvtransval *)VARDATA(newblob);
          newval->mfvs[cnt].cnt = transval2->mfvs[j].cnt;
          j++;
        }
    }
    return(newblob);
}
Exemple #5
0
/*!
 * scalar function taking an mfv sketch, returning a histogram of
 * its most frequent values
 */
Datum __mfvsketch_final(PG_FUNCTION_ARGS)
{
    bytea *      transblob = PG_GETARG_BYTEA_P(0);
    mfvtransval *transval = NULL;
    ArrayType *  retval;
    uint32       i;
    int          dims[2], lbs[2];
    /* Oid     typInput, typIOParam; */
    Oid          outFuncOid;
    bool         typIsVarlena;
    int16        typlen;
    bool         typbyval;
    char         typalign;
    char         typdelim;
    Oid          typioparam;
    Oid          typiofunc;


    if (PG_ARGISNULL(0)) PG_RETURN_NULL();
    if (VARSIZE(transblob) < MFV_TRANSVAL_SZ(0)) PG_RETURN_NULL();

    check_mfvtransval(transblob);
    transval = (mfvtransval *)VARDATA(transblob);
    /*
     * We only declare the variable-length array histo here after some sanity
     * checking. We risk a stack overflow otherwise. In particular, we need to
     * make sure that transval->max_mfvs is initialized. It might not be if the
     * (strict) transition function is never called. (MADLIB-254)
     */
    Datum        histo[transval->max_mfvs][2];

    qsort(transval->mfvs, transval->next_mfv, sizeof(offsetcnt), cnt_cmp_desc);
    getTypeOutputInfo(INT8OID,
                      &outFuncOid,
                      &typIsVarlena);

    for (i = 0; i < transval->next_mfv; i++) {
        void *tmpp = mfv_transval_getval(transblob,i);
        Datum curval = PointerExtractDatum(tmpp, transval->typByVal);
        char *countbuf =
            OidOutputFunctionCall(outFuncOid,
                                  Int64GetDatum(transval->mfvs[i].cnt));
        char *valbuf = OidOutputFunctionCall(transval->outFuncOid, curval);

        histo[i][0] = PointerGetDatum(cstring_to_text(valbuf));
        histo[i][1] = PointerGetDatum(cstring_to_text(countbuf));
        pfree(countbuf);
        pfree(valbuf);
    }

    /*
     * Get info about element type
     */
    get_type_io_data(TEXTOID, IOFunc_output,
                     &typlen, &typbyval,
                     &typalign, &typdelim,
                     &typioparam, &typiofunc);

    dims[0] = i;
    dims[1] = 2;
    lbs[0] = lbs[1] = 0;
    retval = construct_md_array((Datum *)histo,
                                NULL,
                                2,
                                dims,
                                lbs,
                                TEXTOID,
                                -1,
                                0,
                                'i');
    PG_RETURN_ARRAYTYPE_P(retval);
}