Exemplo n.º 1
0
/*
Take an event from the old db, translate its items to new vals
and append to the new cons
*/
static void append_event(tdb_cons *cons,
                         const tdb_event *event,
                         Word_t *uuid_ptr,
                         tdb_val **lexicon_maps)
{
    uint64_t i;
    struct tdb_cons_event *new_event =
        (struct tdb_cons_event*)arena_add_item(&cons->events);

    new_event->item_zero = cons->items.next;
    new_event->num_items = 0;
    new_event->timestamp = event->timestamp;
    new_event->prev_event_idx = *uuid_ptr;
    *uuid_ptr = cons->events.next;

    for (i = 0; i < event->num_items; i++){
        tdb_val val = tdb_item_val(event->items[i]);
        tdb_field field = tdb_item_field(event->items[i]);
        tdb_val new_val = 0;
        /* translate val */
        if (val)
            new_val = lexicon_maps[field - 1][val - 1];
        tdb_item item = tdb_make_item(field, new_val);
        memcpy(arena_add_item(&cons->items), &item, sizeof(tdb_item));
        ++new_event->num_items;
    }
}
Exemplo n.º 2
0
/*
Append an event in this cons.
*/
TDB_EXPORT tdb_error tdb_cons_add(tdb_cons *cons,
                                  const uint8_t uuid[16],
                                  const uint64_t timestamp,
                                  const char **values,
                                  const uint64_t *value_lengths)
{
    tdb_field i;
    struct tdb_cons_event *event;
    Word_t *uuid_ptr;
    __uint128_t uuid_key;

    for (i = 0; i < cons->num_ofields; i++)
        if (value_lengths[i] > TDB_MAX_VALUE_SIZE)
            return TDB_ERR_VALUE_TOO_LONG;

    memcpy(&uuid_key, uuid, 16);
    uuid_ptr = j128m_insert(&cons->trails, uuid_key);

    if (!(event = (struct tdb_cons_event*)arena_add_item(&cons->events)))
        return TDB_ERR_NOMEM;

    event->item_zero = cons->items.next;
    event->num_items = 0;
    event->timestamp = timestamp;
    event->prev_event_idx = *uuid_ptr;
    *uuid_ptr = cons->events.next;

    if (timestamp < cons->min_timestamp)
        cons->min_timestamp = timestamp;

    for (i = 0; i < cons->num_ofields; i++){
        tdb_field field = (tdb_field)(i + 1);
        tdb_val val = 0;
        tdb_item item;
        void *dst;

        /* TODO add a test for sparse trails */
        if (value_lengths[i]){
            if (!(val = (tdb_val)jsm_insert(&cons->lexicons[i],
                                            values[i],
                                            value_lengths[i])))
                return TDB_ERR_NOMEM;

        }
        item = tdb_make_item(field, val);
        if (!(dst = arena_add_item(&cons->items)))
            return TDB_ERR_NOMEM;
        memcpy(dst, &item, sizeof(tdb_item));
        ++event->num_items;
    }
    return 0;
}
Exemplo n.º 3
0
TDB_EXPORT tdb_error tdb_get_trail(tdb_cursor *cursor,
                                   uint64_t trail_id)
{
    struct tdb_decode_state *s = cursor->state;
    const tdb *db = s->db;

    if (trail_id < db->num_trails){
        /* initialize cursor for a new trail */

        uint64_t trail_size;
        tdb_field field;

        /*
        edge encoding: some fields may be inherited from previous events.
        Keep track what we have seen in the past. Start with NULL values.
        */
        for (field = 1; field < db->num_fields; field++)
            s->previous_items[field] = tdb_make_item(field, 0);

        s->data = &db->trails.data[tdb_get_trail_offs(db, trail_id)];
        trail_size = tdb_get_trail_offs(db, trail_id + 1) -
                     tdb_get_trail_offs(db, trail_id);
        s->size = 8 * trail_size - read_bits(s->data, 0, 3);
        s->offset = 3;
        s->tstamp = db->min_timestamp;

        s->trail_id = trail_id;
        cursor->num_events_left = 0;
        cursor->next_event = s->events_buffer;

        return 0;
    }else{
        cursor->num_events_left = 0;
        cursor->next_event = NULL;
        return TDB_ERR_INVALID_TRAIL_ID;
    }
}
Exemplo n.º 4
0
static void *groupby_uuid_handle_one_trail(
    __uint128_t uuid __attribute__((unused)),
    Word_t *value,
    void *state)
{
    struct jm_fold_state *s = (struct jm_fold_state*)state;
    /* find the last event belonging to this trail */
    const struct tdb_cons_event *ev = &s->events[*value - 1];
    uint64_t j = 0;
    uint64_t num_events = 0;
    int ret = 0;

    if (s->ret)
        return s;

    /* loop through all events belonging to this trail,
       following back-links */
    while (1){
        if (j >= s->buf_size){
            s->buf_size += GROUPBUF_INCREMENT;
            if (!(s->buf = realloc(s->buf,
                    s->buf_size * sizeof(struct tdb_grouped_event)))){
                ret = TDB_ERR_NOMEM;
                goto done;
            }
        }
        s->buf[j].trail_id = s->trail_id;
        s->buf[j].item_zero = ev->item_zero;
        s->buf[j].num_items = ev->num_items;
        s->buf[j].timestamp = ev->timestamp;

        /* TODO write a test for an extra long (>2^32) trail */
        if (++j == TDB_MAX_TRAIL_LENGTH){
            ret = TDB_ERR_TRAIL_TOO_LONG;
            goto done;
        }

        if (ev->prev_event_idx)
            ev = &s->events[ev->prev_event_idx - 1];
        else
            break;
    }
    num_events = j;

    /* sort events of this trail by time */
    /* TODO make this stable sort */
    /* TODO this could really benefit from Timsort since raw data
       is often partially sorted */
    qsort(s->buf, num_events, sizeof(struct tdb_grouped_event), compare);

    /* delta-encode timestamps */
    uint64_t prev_timestamp = s->min_timestamp;
    for (j = 0; j < num_events; j++){
        uint64_t timestamp = s->buf[j].timestamp;
        uint64_t delta = timestamp - prev_timestamp;
        if (delta < TDB_MAX_TIMEDELTA){
            if (timestamp > s->max_timestamp)
                s->max_timestamp = timestamp;
            if (delta > s->max_timedelta)
                s->max_timedelta = delta;
            prev_timestamp = timestamp;
            /* convert the delta value to a proper item */
            s->buf[j].timestamp = tdb_make_item(0, delta);
        }else{
            ret = TDB_ERR_TIMESTAMP_TOO_LARGE;
            goto done;
        }
    }

    TDB_WRITE(s->grouped_w,
              s->buf,
              num_events * sizeof(struct tdb_grouped_event));
    ++s->trail_id;

done:
    s->ret = ret;
    return s;
}