Esempio n. 1
0
/*
Take an event from the old db, translate its items to new vals
and append to the new cons
*/
static void append_event(tdb_cons *cons,
                         const tdb_event *event,
                         Word_t *uuid_ptr,
                         tdb_val **lexicon_maps)
{
    uint64_t i;
    struct tdb_cons_event *new_event =
        (struct tdb_cons_event*)arena_add_item(&cons->events);

    new_event->item_zero = cons->items.next;
    new_event->num_items = 0;
    new_event->timestamp = event->timestamp;
    new_event->prev_event_idx = *uuid_ptr;
    *uuid_ptr = cons->events.next;

    for (i = 0; i < event->num_items; i++){
        tdb_val val = tdb_item_val(event->items[i]);
        tdb_field field = tdb_item_field(event->items[i]);
        tdb_val new_val = 0;
        /* translate val */
        if (val)
            new_val = lexicon_maps[field - 1][val - 1];
        tdb_item item = tdb_make_item(field, new_val);
        memcpy(arena_add_item(&cons->items), &item, sizeof(tdb_item));
        ++new_event->num_items;
    }
}
Esempio n. 2
0
/*
this function adds events from db to cons one by one, using the
public API. We need to use this with filtered dbs or otherwise when
we need to re-create lexicons.
*/
static tdb_error tdb_cons_append_subset_lexicon(tdb_cons *cons, const tdb *db)
{
    const char **values = NULL;
    uint64_t *lengths = NULL;
    uint64_t i, trail_id;
    int ret = 0;
    const uint64_t num_fields = tdb_num_fields(db);

    tdb_cursor *cursor = tdb_cursor_new(db);
    if (!cursor)
        return TDB_ERR_NOMEM;

    if (!(values = malloc(num_fields * sizeof(char*)))){
        ret = TDB_ERR_NOMEM;
        goto done;
    }

    if (!(lengths = malloc(num_fields * sizeof(uint64_t)))){
        ret = TDB_ERR_NOMEM;
        goto done;
    }

    for (trail_id = 0; trail_id < tdb_num_trails(db); trail_id++){
        const tdb_event *event;
        const uint8_t *uuid = tdb_get_uuid(db, trail_id);

        if ((ret = tdb_get_trail(cursor, trail_id)))
            goto done;

        while ((event = tdb_cursor_next(cursor))){
            /*
            with TDB_OPT_ONLY_DIFF_ITEMS event->items may be sparse,
            hence we need to reset lengths to zero
            */
            memset(lengths, 0, num_fields * sizeof(uint64_t));
            for (i = 0; i < event->num_items; i++){
                tdb_field field = tdb_item_field(event->items[i]);
                tdb_val val = tdb_item_val(event->items[i]);
                values[field - 1] = tdb_get_value(db,
                                                  field,
                                                  val,
                                                  &lengths[field - 1]);
            }

            if ((ret = tdb_cons_add(cons,
                                    uuid,
                                    event->timestamp,
                                    values,
                                    lengths)))
                goto done;
        }
    }

done:
    free(values);
    free(lengths);
    tdb_cursor_free(cursor);
    return ret;
}
Esempio n. 3
0
TDB_EXPORT int _tdb_cursor_next_batch(tdb_cursor *cursor)
{
    struct tdb_decode_state *s = cursor->state;
    const struct huff_codebook *codebook =
        (const struct huff_codebook*)s->db->codebook.data;
    const struct field_stats *fstats = s->db->field_stats;
    uint64_t *dst = (uint64_t*)s->events_buffer;
    uint64_t i = 0;
    uint64_t num_events = 0;
    tdb_field field;
    tdb_item item;
    const int edge_encoded = s->edge_encoded;

    /* decode the trail - exit early if destination buffer runs out of space */
    while (s->offset < s->size && num_events < s->events_buffer_len){
        /* Every event starts with a timestamp.
           Timestamp may be the first member of a bigram */
        __uint128_t gram = huff_decode_value(codebook,
                                             s->data,
                                             &s->offset,
                                             fstats);
        uint64_t orig_i = i;
        uint64_t delta = tdb_item_val(HUFF_BIGRAM_TO_ITEM(gram));
        uint64_t *num_items;

        /*
        events buffer format:

           [ [ timestamp | num_items | items ... ] tdb_event 1
             [ timestamp | num_items | items ... ] tdb_event 2
             ...
             [ timestamp | num_items | items ... ] tdb_event N ]

        note that events may have a varying number of items, due to
        edge encoding
        */

        s->tstamp += delta;
        dst[i++] = s->tstamp;
        num_items = &dst[i++];

        item = HUFF_BIGRAM_OTHER_ITEM(gram);

        /* handle a possible latter part of the first bigram */
        if (item){
            field = tdb_item_field(item);
            s->previous_items[field] = item;
            if (edge_encoded)
                dst[i++] = item;
        }

        /* decode one event: timestamp is followed by at most num_ofields
           field values */
        while (s->offset < s->size){
            uint64_t prev_offs = s->offset;
            gram = huff_decode_value(codebook,
                                     s->data,
                                     &s->offset,
                                     fstats);
            item = HUFF_BIGRAM_TO_ITEM(gram);
            field = tdb_item_field(item);
            if (field){
                /* value may be either a unigram or a bigram */
                do{
                    s->previous_items[field] = item;
                    if (edge_encoded)
                        dst[i++] = item;
                    gram = item = HUFF_BIGRAM_OTHER_ITEM(gram);
                }while ((field = tdb_item_field(item)));
            }else{
                /* we hit the next timestamp, take a step back and break */
                s->offset = prev_offs;
                break;
            }
        }

        if (!s->filter || event_satisfies_filter(s->previous_items,
                                                 s->filter,
                                                 s->filter_len)){

            /* no filter or filter matches, finalize the event */
            if (!edge_encoded){
                /* dump all the fields of this event in the result, if edge
                   encoding is not requested
                */
                for (field = 1; field < s->db->num_fields; field++)
                    dst[i++] = s->previous_items[field];
            }
            ++num_events;
            *num_items = (i - (orig_i + 2));
        }else{
            /* filter doesn't match - ignore this event */
            i = orig_i;
        }
    }

    cursor->next_event = s->events_buffer;
    cursor->num_events_left = num_events;
    return num_events > 0 ? 1: 0;
}