Esempio n. 1
0
/* UNION b onto a, returns a with b appended */
void fs_binding_union(fs_query *q, fs_binding *a, fs_binding *b)
{
    const int alen = fs_binding_length(a);
    const int blen = fs_binding_length(b);
    a[0].vals->length = 0;
    for (int c=1; a[c].name && b[c].name; c++) {
        if (!a[c].bound && b[c].bound) {
            a[c].bound = 1;
            while (a[c].vals->length < alen) {
                fs_rid_vector_append(a[c].vals, FS_RID_NULL);
            }
        } else if (a[c].bound && !b[c].bound) {
            b[c].bound = 1;
            while (b[c].vals->length < blen) {
                fs_rid_vector_append(b[c].vals, FS_RID_NULL);
            }
        }
        fs_rid_vector_append_vector(a[c].vals, b[c].vals);
    }
}
Esempio n. 2
0
fs_binding *fs_binding_apply_filters(fs_query *q, int block, fs_binding *b, raptor_sequence *constr)
{
    fs_binding *ret = fs_binding_copy(b);
    if (!constr) {
        /* if there's no constriants then we don't need to do anything */

        return ret;
    }
    for (int col=0; b[col].name; col++) {
        ret[col].vals->length = 0;
    }
    int length = fs_binding_length(b);
    fs_binding *restore = q->bt;
    q->bt = b;
    /* TODO should prefetch lexical vals here */
    /* expressions that have been optimised out will be replaces with NULL,
     * so we have to be careful here */
/* --------------------------- */
/* PREFETCH should go here XXX */
/* --------------------------- */
    for (int row=0; row<length; row++) {
        for (int c=0; c<raptor_sequence_size(constr); c++) {
            rasqal_expression *e =
                raptor_sequence_get_at(constr, c);
            if (!e) continue;

            fs_value v = fs_expression_eval(q, row, block, e);
#ifdef DEBUG_FILTER
            rasqal_expression_print(e, stdout);
            printf(" -> ");
            fs_value_print(v);
            printf("\n");
#endif
            if (v.valid & fs_valid_bit(FS_V_TYPE_ERROR) && v.lex) {
                q->warnings = g_slist_prepend(q->warnings, v.lex);
            }
            fs_value result = fn_ebv(v);
            /* its EBV is not true, so we skip to the next one */
            if (result.valid & fs_valid_bit(FS_V_TYPE_ERROR) || !result.in) {
                continue;
            }
            for (int col=0; b[col].name; col++) {
                if (b[col].bound) {
                    fs_rid_vector_append(ret[col].vals, b[col].vals->data[row]);
                }
            }
        }
    }
    q->bt = restore;

    return ret;
}
Esempio n. 3
0
void fs_binding_uniq(fs_binding *bi)
{
    if (fs_binding_length(bi) < 2) {
        /* we don't need to do anything, code below assumes >= 1 row */
        return;
    }

    fs_binding *b = fs_binding_copy_and_clear(bi);

    bi[0].vals->length = 0;

#ifdef DEBUG_MERGE
    double then = fs_time();
#endif
    int length = fs_binding_length(b);

    int outrow = 1;
    for (int column = 1; b[column].name; column++) {
        fs_rid_vector_append(bi[column].vals, table_value(b, column, 0));
        bi[column].bound = b[column].bound;
        b[column].sort = b[column].bound;
    }
    for (int row = 1; row < length; row++) {
	if (binding_row_compare(NULL, b, b, row, row-1, length, length) == 0) {
	    continue;
	}
	for (int column = 1; b[column].name; column++) {
            fs_rid_vector_append(bi[column].vals, table_value(b, column, row));
	}
	outrow++;
    }

#ifdef DEBUG_MERGE
    double now = fs_time();
    printf("uniq took %fs (%d->%d rows)\n", now-then, length, outrow);
    fs_binding_print(bi, stdout);
#endif
    fs_binding_free(b);
}
Esempio n. 4
0
/* returns true if the expression has bound values, or nothing does */
int fs_opt_is_bound(fs_binding *b, rasqal_literal *l)
{
    if (!l) return 0;

    switch (l->type) {
    case RASQAL_LITERAL_VARIABLE: {
        if (fs_binding_length(b) == 0) {
            return 1;
        }
        fs_binding *bv = fs_binding_get(b, l->value.variable);
        if (bv && bv->bound == 1) {
            return 1;
        }
        return 0;

    }

    case RASQAL_LITERAL_INTEGER_SUBTYPE:
    case RASQAL_LITERAL_XSD_STRING:
    case RASQAL_LITERAL_UDT:
    case RASQAL_LITERAL_URI:
    case RASQAL_LITERAL_STRING:
    case RASQAL_LITERAL_BOOLEAN:
    case RASQAL_LITERAL_INTEGER:
    case RASQAL_LITERAL_DOUBLE:
    case RASQAL_LITERAL_FLOAT:
    case RASQAL_LITERAL_DECIMAL:
    case RASQAL_LITERAL_DATETIME:
#if RASQAL_VERSION >= 929
    case RASQAL_LITERAL_DATE:
#endif
        return 0;

    /* we shouldn't find any of these... */
    case RASQAL_LITERAL_UNKNOWN:
    case RASQAL_LITERAL_BLANK:
    case RASQAL_LITERAL_PATTERN:
    case RASQAL_LITERAL_QNAME:
        return 0;
    }

    return 0;
}
Esempio n. 5
0
/* inplace quicksort on an array of rid_vectors */
void fs_binding_sort(fs_binding *b)
{
    int scount = 0;
    int length = fs_binding_length(b);

    for (int i=0; b[i].name; i++) {
	if (b[i].sort) scount++;
        if (b[i].vals->length < length) {
            for (int j=b[i].vals->length; j<length; j++) {
                fs_rid_vector_append(b[i].vals, FS_RID_NULL);
            }
        }
    }
    if (!scount) {
	fs_error(LOG_WARNING, "fs_binding_sort() called with no sort "
			      "columns set, ignoring");

	return;
    }

    /* fill out the _ord column with integers in [0,n] */
    b[0].vals->length = 0;
    for (int row=0; row<length; row++) {
        fs_rid_vector_append(b[0].vals, row);
    }

    if (length > 1) {
#ifdef DEBUG_MERGE
        double then = fs_time();
#endif

        /* ctxt could include other stuff for optimisations */
        struct sort_context ctxt = { b };
        fs_qsort_r(b[0].vals->data, length, sizeof(fs_rid), qsort_r_cmp, &ctxt);

#ifdef DEBUG_MERGE
        double now = fs_time();
        printf("sort took %f seconds\n", now - then);
#endif
    }
}
Esempio n. 6
0
static int update_op(struct update_context *uc)
{
    fs_rid_vector *vec[4];
    switch (uc->op->type) {
    case RASQAL_UPDATE_TYPE_UNKNOWN:
        add_message(uc, "Unknown update operation", 0);
        return 1;
    case RASQAL_UPDATE_TYPE_CLEAR:
        fs_clear(uc, graph_arg(uc->op->graph_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_CREATE:
        return 0;
    case RASQAL_UPDATE_TYPE_DROP:
        fs_clear(uc, graph_arg(uc->op->graph_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_LOAD:
        fs_load(uc, graph_arg(uc->op->document_uri),
                    graph_arg(uc->op->graph_uri));
        return 0;
#if RASQAL_VERSION >= 924
    case RASQAL_UPDATE_TYPE_ADD:
        fs_add(uc, graph_arg(uc->op->graph_uri),
                   graph_arg(uc->op->document_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_MOVE:
        fs_move(uc, graph_arg(uc->op->graph_uri),
                    graph_arg(uc->op->document_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_COPY:
        fs_copy(uc, graph_arg(uc->op->graph_uri),
                    graph_arg(uc->op->document_uri));
        return 0;
#endif
    case RASQAL_UPDATE_TYPE_UPDATE:
        break;
    }

    fs_hash_freshen();

    raptor_sequence *todel = NULL;
    raptor_sequence *toins = NULL;

    if (uc->op->delete_templates && !uc->op->where) {
        int where = 0;

        /* check to see if it's a DELETE WHERE { } */
        for (int t=0; t<raptor_sequence_size(uc->op->delete_templates); t++) {
            rasqal_triple *tr = raptor_sequence_get_at(uc->op->delete_templates, t);
            if (any_vars(tr)) {
                where = 1;
                break;
            }
        }
        if (where) {
            fs_error(LOG_ERR, "DELETE WHERE { x } not yet supported");
            add_message(uc, "DELETE WHERE { x } not yet supported, use DELETE { x } WHERE { x }", 0);

            return 1;
        }
    }

#if RASQAL_VERSION >= 923
    if (uc->op->where) {
        todel = raptor_new_sequence(NULL, NULL);
        toins = raptor_new_sequence(NULL, NULL);
        raptor_sequence *todel_p = raptor_new_sequence(NULL, NULL);
        raptor_sequence *toins_p = raptor_new_sequence(NULL, NULL);
        raptor_sequence *vars = raptor_new_sequence(NULL, NULL);

        fs_query *q = calloc(1, sizeof(fs_query));
        uc->q = q;
        q->qs = uc->qs;
        q->rq = uc->rq;
        q->flags = FS_BIND_DISTINCT;
#ifdef DEBUG_MERGE
        q->flags |= FS_QUERY_CONSOLE_OUTPUT;
#endif
        q->boolean = 1;
        q->opt_level = 3;
        q->soft_limit = -1;
        q->segments = fsp_link_segments(uc->link);
        q->link = uc->link;
        q->bb[0] = fs_binding_new();
        q->bt = q->bb[0];

        /* hashtable to hold runtime created resources */
        q->tmp_resources = g_hash_table_new_full(fs_rid_hash, fs_rid_equal, g_free, fs_free_cached_resource);

        /* add column to denote join ordering */
        fs_binding_create(q->bb[0], "_ord", FS_RID_NULL, 0);

        if (uc->op->delete_templates) {
            for (int t=0; t<raptor_sequence_size(uc->op->delete_templates); t++) {
                rasqal_triple *tr = raptor_sequence_get_at(uc->op->delete_templates, t);
                if (any_vars(tr)) {
                    fs_check_cons_slot(q, vars, tr->subject);
                    fs_check_cons_slot(q, vars, tr->predicate);
                    fs_check_cons_slot(q, vars, tr->object);
                    raptor_sequence_push(todel_p, tr);
                } else {
                    raptor_sequence_push(todel, tr);
                }
            }
        }

        if (uc->op->insert_templates) {
            for (int t=0; t<raptor_sequence_size(uc->op->insert_templates); t++) {
                rasqal_triple *tr = raptor_sequence_get_at(uc->op->insert_templates, t);
                if (any_vars(tr)) {
                    fs_check_cons_slot(q, vars, tr->subject);
                    fs_check_cons_slot(q, vars, tr->predicate);
                    fs_check_cons_slot(q, vars, tr->object);
                    raptor_sequence_push(toins_p, tr);
                } else {
                    raptor_sequence_push(toins, tr);
                }
            }
        }

        q->num_vars = raptor_sequence_size(vars);

        for (int i=0; i < q->num_vars; i++) {
            rasqal_variable *v = raptor_sequence_get_at(vars, i);
            fs_binding_add(q->bb[0], v, FS_RID_NULL, 1);
        }

        /* perform the WHERE match */
        fs_query_process_pattern(q, uc->op->where, vars);

        q->length = fs_binding_length(q->bb[0]);

        for (int s=0; s<4; s++) {
            vec[s] = fs_rid_vector_new(0);
        }
        for (int t=0; t<raptor_sequence_size(todel_p); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(todel_p, t);
            for (int row=0; row < q->length; row++) {
                delete_rasqal_triple(uc, vec, triple, row);
            }
            if (fs_rid_vector_length(vec[0]) > 1000) {
                fsp_delete_quads_all(uc->link, vec);
            }
        }
        if (fs_rid_vector_length(vec[0]) > 0) {
            fsp_delete_quads_all(uc->link, vec);
        }
        for (int s=0; s<4; s++) {
//fs_rid_vector_print(vec[s], 0, stdout);
            fs_rid_vector_free(vec[s]);
            vec[s] = NULL;
        }

        for (int t=0; t<raptor_sequence_size(toins_p); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(toins_p, t);
            for (int row=0; row < q->length; row++) {
                insert_rasqal_triple(uc, triple, row);
            }
        }

        /* must not free the rasqal_query */
        q->rq = NULL;
        fs_query_free(q);
        uc->q = NULL;
    } else {
        todel = uc->op->delete_templates;
        toins = uc->op->insert_templates;
    }
#else
    if (uc->op->where) {
        fs_error(LOG_ERR, "DELETE/INSERT WHERE requires Rasqal 0.9.23 or newer");
        add_message(uc, "DELETE/INSERT WHERE requires Rasqal 0.9.23 or newer", 0);
    }
#endif

    /* delete constant triples */
    if (todel) {
        for (int s=0; s<4; s++) {
            vec[s] = fs_rid_vector_new(0);
        }
        for (int t=0; t<raptor_sequence_size(todel); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(todel, t);
            if (any_vars(triple)) {
                continue;
            }
            delete_rasqal_triple(uc, vec, triple, 0);
        }
        if (fs_rid_vector_length(vec[0]) > 0) {
            fsp_delete_quads_all(uc->link, vec);
        }
        for (int s=0; s<4; s++) {
            fs_rid_vector_free(vec[s]);
            vec[s] = NULL;
        }
    }

    /* insert constant triples */
    if (toins) {
        for (int t=0; t<raptor_sequence_size(toins); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(toins, t);
            if (any_vars(triple)) {
                continue;
            }
            insert_rasqal_triple(uc, triple, 0);
        }
    }
    fs_hash_freshen();

    return 0;
}
Esempio n. 7
0
int fs_bind_freq(fs_query_state *qs, fs_query *q, int block, rasqal_triple *t)
{
    int ret = 100;
#if DEBUG_OPTIMISER
    char dir = 'X';
#endif

    if (!fs_opt_is_const(q->bb[block], t->subject) && !fs_opt_is_const(q->bb[block], t->predicate) &&
            !fs_opt_is_const(q->bb[block], t->object) && !fs_opt_is_const(q->bb[block], t->origin)) {
#if DEBUG_OPTIMISER
        dir = '?';
#endif
        ret = INT_MAX;
    } else if (!fs_opt_is_const(q->bb[block], t->subject) &&
               !fs_opt_is_const(q->bb[block], t->object)) {
#if DEBUG_OPTIMISER
        dir = '?';
#endif
        ret = INT_MAX - 100;
    } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1 &&
               fs_opt_num_vals(q->bb[block], t->predicate) == 1) {
#if DEBUG_OPTIMISER
        dir = 's';
#endif
        ret = calc_freq(q, block, qs->freq_s, t->subject, t->predicate);
    } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1 &&
               fs_opt_num_vals(q->bb[block], t->predicate) == 1) {
#if DEBUG_OPTIMISER
        dir = 'o';
#endif
        ret = calc_freq(q, block, qs->freq_o, t->object, t->predicate) +
              q->segments * 50;
    } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1) {
#if DEBUG_OPTIMISER
        dir = 's';
#endif
        ret = calc_freq(q, block, qs->freq_s, t->subject, NULL);
    } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1) {
#if DEBUG_OPTIMISER
        dir = 'o';
#endif
        ret = calc_freq(q, block, qs->freq_s, t->object, NULL) +
              q->segments * 50;
        /* cluases for if we have no freq data */
    } else if (fs_opt_num_vals(q->bb[block], t->subject) < 1000000 &&
               fs_opt_num_vals(q->bb[block], t->predicate) < 100 &&
               fs_opt_num_vals(q->bb[block], t->object) == INT_MAX) {
#if DEBUG_OPTIMISER
        dir = 's';
#endif
        ret = fs_opt_num_vals(q->bb[block], t->subject) * fs_opt_num_vals(q->bb[block], t->predicate);
        if (!fs_opt_is_bound(q->bb[block], t->subject) &&
                !fs_opt_is_bound(q->bb[block], t->predicate) &&
                !fs_opt_is_bound(q->bb[block], t->object)) {
            ret *= (fs_binding_length(q->bb[block]) * 100);
        }
    } else if (fs_opt_num_vals(q->bb[block], t->object) < 1000000 &&
               fs_opt_num_vals(q->bb[block], t->predicate) < 100 &&
               fs_opt_num_vals(q->bb[block], t->subject) == INT_MAX) {
#if DEBUG_OPTIMISER
        dir = 'o';
#endif
        ret = fs_opt_num_vals(q->bb[block], t->predicate) * fs_opt_num_vals(q->bb[block], t->object);
        if (!fs_opt_is_bound(q->bb[block], t->subject) &&
                !fs_opt_is_bound(q->bb[block], t->predicate) &&
                !fs_opt_is_bound(q->bb[block], t->object)) {
            ret *= (fs_binding_length(q->bb[block]) * 100);
        }
    }

#if DEBUG_OPTIMISER
    if (q->flags & FS_QUERY_EXPLAIN) {
        printf("freq(%c, ", dir);
        rasqal_triple_print(t, stdout);
        printf(") = %d\n", ret);
    }
#endif

    return ret;
}
Esempio n. 8
0
fs_binding *fs_binding_join(fs_query *q, fs_binding *a, fs_binding *b, fs_join_type join)
{
    if (a == NULL) {
        return fs_binding_copy(b);
    }
    if (b == NULL) {
        return fs_binding_copy(a);
    }

    fs_binding *c = fs_binding_copy(a);
    int inter = 0;      /* do the tables intersect */

    for (int i=0; a[i].name; i++) {
	a[i].sort = 0;
	b[i].sort = 0;
	c[i].sort = 0;
        c[i].vals->length = 0;
    }
    int bound_a = 0;
    int bound_b = 0;
    for (int i=1; a[i].name; i++) {
        if (a[i].bound) bound_a++;
        if (b[i].bound) bound_b++;

        if (a[i].bound || b[i].bound) {
            c[i].bound = 1;
        }

	if (a[i].bound && b[i].bound) {
	    inter = 1;
	    a[i].sort = 1;
	    b[i].sort = 1;
#ifdef DEBUG_MERGE
            printf("joining on %s\n", a[i].name);
#endif
	}
    }

    /* a and b bound variables do not intersect, we can just dump results */
    if (!inter) {
        int length_a = fs_binding_length(a);
        int length_b = fs_binding_length(b);
	for (int i=1; a[i].name; i++) {
            if (!a[i].bound) {
                for (int j=0; j<length_a; j++) {
                    fs_rid_vector_append(c[i].vals, FS_RID_NULL);
                }
            } else {
                fs_rid_vector_append_vector(c[i].vals, a[i].vals);
            }
            if (!b[i].bound) {
                for (int j=0; j<length_b; j++) {
                    fs_rid_vector_append(c[i].vals, FS_RID_NULL);
                }
            } else {
                fs_rid_vector_append_vector(c[i].vals, b[i].vals);
            }
	}
#ifdef DEBUG_MERGE
        printf("append all, result:\n");
        fs_binding_print(c, stdout);
#endif
	return c;
    }

    int length_a = fs_binding_length(a);
    int length_b = fs_binding_length(b);

    /* sort the two sets of bindings so they can be merged linearly */
    fs_binding_sort(a);
    fs_binding_sort(b);

#ifdef DEBUG_MERGE
    printf("a: %d bindings\n", fs_binding_length(a));
    fs_binding_print(a, stdout);
    printf("b: %d bindings\n", fs_binding_length(b));
    fs_binding_print(b, stdout);
#endif

    /* If were running in restricted mode, truncate the binding tables */
    if (q->flags & FS_QUERY_RESTRICTED) {
        int restricted = 0;
        fs_binding_truncate(a, q->soft_limit);
        if (length_a > fs_binding_length(a)) {
            length_a = fs_binding_length(a);
            restricted = 1;
        }
        fs_binding_truncate(b, q->soft_limit);
        if (length_b > fs_binding_length(b)) {
            length_b = fs_binding_length(b);
            restricted = 1;
        }
        if (restricted) {
            char *msg = "some results have been dropped to prevent overunning effort allocation";
            q->warnings = g_slist_prepend(q->warnings, msg);
        }
    }

    int apos = 0;
    int bpos = 0;
    int cmp;
    while (apos < length_a) {
        if (join == FS_INNER && bpos >= length_b) break;
	cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b);
        if (cmp == -1) {
            /* A and B aren't compatible, A sorts lower, skip A or left join */
#if DEBUG_MERGE > 1
            printf("[L] Ar=%d, Br=%d", apos, bpos);
#endif
            if (join == FS_LEFT) {
                for (int col=0; a[col].name; col++) {
                    if (!c[col].need_val) {
                        continue;
                    } else if (a[col].bound) {
#if DEBUG_MERGE > 1
                        printf(" %s=%016llx", c[col].name, table_value(a, col, apos));
#endif
                        fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                    } else {
#if DEBUG_MERGE > 1
                        printf(" %s=null", c[col].name);
#endif
                        fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                    }
                }
            }
            apos++;
        } else if (cmp == 0 || cmp == -2 || cmp == 2) {
        /* Both rows are equal (cmp == 0), or one row is null (cmp == -2, 2) */
	    /* Both rows match, find out what combinations bind and produce them */
#if DEBUG_MERGE > 1
            printf("[I] Ar=%d, Br=%d", apos, bpos);
#endif
            int range_a = apos+1;
            int range_b = bpos+1;
            while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++;
            while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++;
            int start_a = apos;
            int start_b = bpos;
            for (apos = start_a; apos<range_a; apos++) {
                for (bpos = start_b; bpos<range_b; bpos++) {
                    for (int col=0; a[col].name; col++) {
                        if (!c[col].need_val) {
                            continue;
                        } else if (!a[col].bound && !b[col].bound) {
#if DEBUG_MERGE > 1
                            printf(" %s=null", c[col].name);
#endif
                            fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                        } else if (a[col].bound) {
                            /* if were left joining and A is NULL, we want the
                             * value from B */
                            if (join == FS_LEFT && table_value(a, col, apos) == FS_RID_NULL && b[col].bound) {
#if DEBUG_MERGE > 1
                                printf(" %s=%016llx", c[col].name, table_value(b, col, bpos));
#endif
                                fs_rid_vector_append(c[col].vals, table_value(b, col, bpos));
                            } else {
#if DEBUG_MERGE > 1
                                printf(" %s=%016llx", c[col].name, table_value(a, col, apos));
#endif
                                fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                            }
                        } else {
#if DEBUG_MERGE > 1
                            printf(" %s=%016llx", c[col].name, table_value(b, col, bpos));
#endif
                            fs_rid_vector_append(c[col].vals, table_value(b, col, bpos));
                        }
                    }
                }
            }
            /* this is actually unneccesary because the for loop will do the
             * same thing, but it's clearer */
            apos = range_a;
            bpos = range_b;
	} else if (cmp == +1) {
            /* A and B aren't compatible, B sorts lower, skip B */
            bpos++;
	} else {
            fs_error(LOG_ERR, "cmp=%d, value out of range", cmp);
        }
#if DEBUG_MERGE > 1
        printf("\n");
#endif
    }

    /* clear the _ord columns */
    a[0].vals->length = 0;
    b[0].vals->length = 0;

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(c));
    fs_binding_print(c, stdout);
#endif

    return c;
}
Esempio n. 9
0
/* return to = from [X] to, this is used to perform joins inside blocks, it
 * saves allocations by doing most operations inplace, unlike fs_binding_join */
void fs_binding_merge(fs_query *q, int block, fs_binding *from, fs_binding *to)
{
    fs_binding *inter_f = NULL; /* the intersecting column */
    fs_binding *inter_t = NULL; /* the intersecting column */

    for (int i=0; from[i].name; i++) {
	from[i].sort = 0;
	to[i].sort = 0;
    }
    int used = 0;
    for (int i=1; from[i].name; i++) {
	if (!from[i].bound || !to[i].bound) continue;
        if (from[i].used) used++;

	if (from[i].bound && to[i].bound) {
	    inter_f = from+i;
	    inter_t = to+i;
	    from[i].sort = 1;
	    to[i].sort = 1;
#ifdef DEBUG_MERGE
    printf("@@ join on %s\n", to[i].name);
#endif
	}
    }

    /* from and to bound variables do not intersect, we can just dump results,
       under some circustances we need to do a combinatorial explosion */
    if (!inter_f && (fs_binding_length(from) == 0)) {
	const int length_f = fs_binding_length(from);
	const int length_t = fs_binding_length(to);
	for (int i=1; from[i].name; i++) {
	    if (to[i].bound && !from[i].bound) {
                if (from[i].vals) {
                    fs_rid_vector_free(from[i].vals);
                }
		from[i].vals = fs_rid_vector_new(length_f);
		for (int d=0; d<length_f; d++) {
		    from[i].vals->data[d] = FS_RID_NULL;
		}
		from[i].bound = 1;
	    }
	    if (!from[i].bound) continue;
	    if (!to[i].bound) {
                if (to[i].vals) {
                    fs_rid_vector_free(to[i].vals);
                }
		to[i].vals = fs_rid_vector_new(length_t);
		for (int d=0; d<length_t; d++) {
                    to[i].vals->data[d] = FS_RID_NULL;
                }
	    }
	    fs_rid_vector_append_vector(to[i].vals, from[i].vals);
	    to[i].bound = 1;
	}
#ifdef DEBUG_MERGE
        printf("append all, result:\n");
        fs_binding_print(to, stdout);
#endif

	return;
    }

    /* If were running in restricted mode, truncate the binding tables */
    if (q->flags & FS_QUERY_RESTRICTED) {
        fs_binding_truncate(from, q->soft_limit);
        fs_binding_truncate(to, q->soft_limit);
    }

    int length_t = fs_binding_length(to);
    int length_f = fs_binding_length(from);
    /* ms8: this list keeps track of the vars to replace */
    GList *rep_list = NULL;
    for (int i=1; to[i].name; i++) {
	if (to+i == inter_t || to[i].used || to[i].bound) {
	    /* do nothing */
#if DEBUG_MERGE > 1
    printf("@@ preserve %s\n", to[i].name);
#endif
	} else if (from[i].bound && !to[i].bound) {
#if DEBUG_MERGE > 1
    printf("@@ replace %s\n", from[i].name);
#endif
	    to[i].bound = 1;
            if (to[i].vals) {
                if (to[i].vals->length != length_t) {
                    fs_rid_vector_free(to[i].vals);
                    to[i].vals = fs_rid_vector_new(length_t);
                }
            } else {
                to[i].vals = fs_rid_vector_new(length_t);
            }
	    for (int d=0; d<length_t; d++) {
		to[i].vals->data[d] = FS_RID_NULL;
	    }
        rep_list = g_list_append(rep_list, GINT_TO_POINTER(i));
	}
    }

    /* sort the two sets of bindings so they can be merged linearly */
    if (inter_f) {
        fs_binding_sort(from);
        fs_binding_sort(to);
    } else {
        /* make sure the tables are not marked sorted */
        from[0].vals->length = 0;
        to[0].vals->length = 0;
    }

#ifdef DEBUG_MERGE
    printf("old: %d bindings\n", fs_binding_length(from));
    fs_binding_print(from, stdout);
    printf("new: %d bindings\n", fs_binding_length(to));
    fs_binding_print(to, stdout);
#endif

    int fpos = 0;
    int tpos = 0;
    while (fpos < length_f || tpos < length_t) {
        if (q->flags & FS_QUERY_RESTRICTED &&
            fs_binding_length(to) >= q->soft_limit) {
            char *msg = g_strdup("some results have been dropped to prevent overunning time allocation");
            q->warnings = g_slist_prepend(q->warnings, msg);
            break;
        }
	int cmp;
	cmp = binding_row_compare(q, from, to, fpos, tpos, length_f, length_t);
	if (cmp == 0) {
	    /* both rows match */
	    int fp, tp = tpos;
	    for (fp = fpos; binding_row_compare(q, from, to, fp, tpos, length_f, length_t) == 0; fp++) {
#if DEBUG_MERGE > 1
if (fp == DEBUG_CUTOFF) {
    printf("...\n");
}
#endif
		for (tp = tpos; 1; tp++) {
		    if (binding_row_compare(q, from, to, fp, tp, length_f, length_t) == 0) {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    printf("STEP %d, %d  ", fp-fpos, tp-tpos);
}
#endif
			if (fp == fpos) {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    if (inter_f) {
	printf("REPL %llx\n", inter_f->vals->data[fp]);
    } else {
	printf("REPL ???\n");
    }
}
#endif
			    for (int c=1; to[c].name; c++) {
				if (!from[c].bound && !to[c].bound) continue;
				if (from[c].bound && table_value(from, c, fp) == FS_RID_NULL) {
				    continue;
				}
				if (from[c].bound && fp < from[c].vals->length) {
                                    long wrow = to[0].vals->length ? to[0].vals->data[tp] : tp;
				    to[c].vals->data[wrow] = table_value(from, c, fp);
				    if (to[c].vals->length <= tp) {
					to[c].vals->length = tp+1;
				    }
				}
			    }
			} else {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    printf("ADD\n");
}
#endif
			    for (int c=1; to[c].name; c++) {
				if (!from[c].bound && !to[c].bound) continue;
				if (from[c].bound && fp < from[c].vals->length) {
				    fs_rid_vector_append(to[c].vals, table_value(from, c, fp));
				} else {
				    fs_rid_vector_append(to[c].vals, table_value(to, c, tp));
				}
			    }
			}
		    } else {
			break;
		    }
		}
	    }
	    tpos = tp;
	    fpos = fp;
	} else if (cmp <= -1) {
	    fpos++;
	} else if (cmp >= 1) {
	    tpos++;
	} else {
	    fs_error(LOG_CRIT, "unknown compare state %d in binding", cmp);
	}
    }

    /* clear the _ord columns */
    from[0].vals->length = 0;
    to[0].vals->length = 0;

    /* ms8: INIT code to clean up rows that where not replaced */
    if (rep_list) {
        unsigned char *to_del = fs_new_bit_array(length_t);
        int to_del_count = 0;
        while(rep_list) {
            int col_r = GPOINTER_TO_INT(rep_list->data);
             rep_list = g_list_next(rep_list);
             for (int d=0; d<length_t; d++) {
                if (to[col_r].vals->data[d] == FS_RID_NULL) {
                     fs_bit_array_set(to_del, d, 0);
                     to_del_count++;
                }
             }
         }
         g_list_free(rep_list);
         if (to_del_count) {
             int vars = 0;
             for (int i=1; to[i].name; i++)
                vars++;
             fs_rid_vector **clean = calloc(vars, sizeof(fs_rid_vector *));
             for (int i=0;i<vars;i++)
                clean[i] = fs_rid_vector_new(0);
             for (int d = 0;d<length_t;d++) {
                   if (fs_bit_array_get(to_del,d)) {
                     for (int i=0;i<vars;i++) {
                        fs_rid_vector_append(clean[i],to[i+1].vals->data[d]);
                     }
                   }
             }
             for (int i=1;i<=vars;i++) {
                free(to[i].vals->data);
                to[i].vals->data = clean[i-1]->data;
                to[i].vals->length = clean[i-1]->length;
                to[i].vals->size = clean[i-1]->size;
                free(clean[i-1]);
             }
             free(clean);
         }
         fs_bit_array_destroy(to_del);
     }
    /* ms8: END code to clean up rows that where not replaced */

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(to));
    fs_binding_print(to, stdout);
#endif
}
Esempio n. 10
0
void fs_binding_print(fs_binding *b, FILE *out)
{
    int length = fs_binding_length(b);

    fprintf(out, "    ");
    if (b[0].vals->length) {
        fprintf(out, "      ");
    }
    for (int c=1; b[c].name; c++) {
	if (b[c].bound) {
	    fprintf(out, " %16.16s", b[c].name);
	} else {
	    fprintf(out, " %12.12s", b[c].name);
	}
    }
    fprintf(out, "\n");
    fprintf(out, " row");
    if (b[0].vals->length) {
        fprintf(out, " order");
    }
    for (int c=1; b[c].name; c++) {
        if (b[c].bound) {
	    fprintf(out, "     %c%c%c%c A%02d D%02d",
		    b[c].proj ? 'p' : '-', b[c].used ? 'u' : '-',
		    b[c].need_val ? 'n' : '-', b[c].bound ? 'b' : '-',
		    b[c].appears, b[c].depends);
	} else {
	    fprintf(out, "  %c%c%c A%02d D%02d",
		    b[c].proj ? 'p' : '-', b[c].used ? 'u' : '-',
		    b[c].need_val ? 'n' : '-', 
		    b[c].appears, b[c].depends);
	}
    }
    fprintf(out, "\n");
    for (long int lr=0; lr<length; lr++) {
        long int r = lr;
        if (b[0].vals->length) {
            r = b[0].vals->data[lr];
        }
        fprintf(out, "%4ld", lr);
        if (b[0].vals->length) {
            fprintf(out, " %5ld", r);
        }
	for (int c=1; b[c].name; c++) {
            if (b[c].bound) {
		if (r < b[c].vals->length && b[c].vals->data[r] == FS_RID_NULL) {
                    fprintf(out, " %16s", "null");
		} else {
                    fprintf(out, " %016llx", r < b[c].vals->length ? b[c].vals->data[r] : -1);
		}
	    } else {
		fprintf(out, "%13s", "null");
	    }
	}
	fprintf(out, "\n");
#if !defined(DEBUG_MERGE) || DEBUG_MERGE < 3
	if (length > 25 && lr > DEBUG_CUTOFF && (length - lr) > 2) {

	    fprintf(out, " ...\n");
	    lr = length - 3;
	}
#endif
    }
}
Esempio n. 11
0
fs_binding *fs_binding_minus(fs_query *q, fs_binding *a, fs_binding *b)
{
    if (a == NULL) {
        return NULL;
    }
    if (b == NULL) {
        /* a - 0 = a */
        return fs_binding_copy(a);
    }

    fs_binding *c = fs_binding_copy(a);
    int inter = 0;      /* do the tables intersect */

    for (int i=0; a[i].name; i++) {
	a[i].sort = 0;
	b[i].sort = 0;
	c[i].sort = 0;
        c[i].vals->length = 0;
    }
    int bound_a = 0;
    int bound_b = 0;
    for (int i=1; a[i].name; i++) {
        if (a[i].bound) bound_a++;
        if (b[i].bound) bound_b++;

        if (a[i].bound || b[i].bound) {
            c[i].bound = 1;
        }

	if (a[i].bound && b[i].bound) {
	    inter = 1;
	    a[i].sort = 1;
	    b[i].sort = 1;
#ifdef DEBUG_MERGE
            printf("joining on %s\n", a[i].name);
#endif
	}
    }

    /* a and b bound variables do not intersect, return c (copy of a) */
    if (!inter) {
#ifdef DEBUG_MERGE
        printf("remove nothing, result:\n");
        fs_binding_print(c, stdout);
#endif
	return c;
    }

    int length_a = fs_binding_length(a);
    int length_b = fs_binding_length(b);

    /* sort the two sets of bindings so they can be merged linearly */
    fs_binding_sort(a);
    fs_binding_sort(b);

#ifdef DEBUG_MERGE
    printf("a: %d bindings\n", fs_binding_length(a));
    fs_binding_print(a, stdout);
    printf("b: %d bindings\n", fs_binding_length(b));
    fs_binding_print(b, stdout);
#endif

    int apos = 0;
    int bpos = 0;
    int cmp;
    while (apos < length_a) {
	cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b);
        if (cmp == -1 || cmp == -2) {
            /* A and B aren't compatible, keep A row */
            for (int col=0; a[col].name; col++) {
                if (!c[col].need_val) {
                    continue;
                } else if (a[col].bound) {
                    fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                } else {
                    fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                }
            }
            apos++;
        } else if (cmp == 0) {
            /* Both rows are equal (cmp == 0), skip A row in result */
#if DEBUG_MERGE > 1
            printf("[I] Ar=%d, Br=%d", apos, bpos);
#endif
            int range_a = apos+1;
            int range_b = bpos+1;
            while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++;
            while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++;
            apos = range_a;
            bpos = range_b;
	} else if (cmp == +1 || cmp == +2) {
            /* A and B aren't compatible, B sorts lower, skip B or
               B row is NULL */
            bpos++;
	} else {
            fs_error(LOG_ERR, "cmp=%d, value out of range", cmp);
        }
    }

    /* clear the _ord columns */
    a[0].vals->length = 0;
    b[0].vals->length = 0;

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(c));
    fs_binding_print(c, stdout);
#endif

    return c;
}
Esempio n. 12
0
static int update_op(struct update_context *ct)
{
    fs_rid_vector *vec[4];

    switch (ct->op->type) {
    case RASQAL_UPDATE_TYPE_UNKNOWN:
        add_message(ct, "Unknown update operation", 0);
        return 1;
    case RASQAL_UPDATE_TYPE_CLEAR:
        fs_clear(ct, (char *)raptor_uri_as_string(ct->op->graph_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_CREATE:
        return 0;
    case RASQAL_UPDATE_TYPE_DROP:
        fs_clear(ct, (char *)raptor_uri_as_string(ct->op->graph_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_LOAD:
        fs_load(ct, (char *)raptor_uri_as_string(ct->op->document_uri),
                    (char *)raptor_uri_as_string(ct->op->graph_uri));
        return 0;
    case RASQAL_UPDATE_TYPE_UPDATE:
        break;
    }

    fs_hash_freshen();

    raptor_sequence *todel = NULL;
    raptor_sequence *toins = NULL;

    if (ct->op->where) {
        todel = raptor_new_sequence(NULL, NULL);
        toins = raptor_new_sequence(NULL, NULL);
        raptor_sequence *todel_p = raptor_new_sequence(NULL, NULL);
        raptor_sequence *toins_p = raptor_new_sequence(NULL, NULL);
        raptor_sequence *vars = raptor_new_sequence(NULL, NULL);

        fs_query *q = calloc(1, sizeof(fs_query));
        ct->q = q;
        q->qs = ct->qs;
        q->rq = ct->rq;
        q->flags = FS_BIND_DISTINCT;
        q->opt_level = 3;
        q->soft_limit = -1;
        q->segments = fsp_link_segments(ct->link);
        q->link = ct->link;
        q->bb[0] = fs_binding_new();
        q->bt = q->bb[0];
        /* add column to denote join ordering */
        fs_binding_add(q->bb[0], "_ord", FS_RID_NULL, 0);

        struct pattern_data pd = { .q = q, .vars = vars, .patterns = NULL, .fixed = NULL };

        if (ct->op->delete_templates) {
            pd.patterns = todel_p;
            pd.fixed = todel;

            for (int t=0; t<raptor_sequence_size(ct->op->delete_templates); t++) {
                rasqal_graph_pattern *gp = raptor_sequence_get_at(ct->op->delete_templates, t);
                assign_gp(gp, NULL, &pd);
            }
        }

        if (ct->op->insert_templates) {
            pd.patterns = toins_p;
            pd.fixed = toins;

            for (int t=0; t<raptor_sequence_size(ct->op->insert_templates); t++) {
                rasqal_graph_pattern *gp = raptor_sequence_get_at(ct->op->insert_templates, t);
                assign_gp(gp, NULL, &pd);
            }
        }

        q->num_vars = raptor_sequence_size(vars);

        for (int i=0; i < q->num_vars; i++) {
            rasqal_variable *v = raptor_sequence_get_at(vars, i);
            fs_binding *b = fs_binding_get(q->bb[0], (char *)v->name);
            if (b) {
                b->need_val = 1;
            } else {
                fs_binding_add(q->bb[0], (char *)v->name, FS_RID_NULL, 1);
            }
        }

        fs_query_process_pattern(q, ct->op->where, vars);

        q->length = fs_binding_length(q->bb[0]);

        for (int s=0; s<4; s++) {
            vec[s] = fs_rid_vector_new(0);
        }
        for (int t=0; t<raptor_sequence_size(todel_p); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(todel_p, t);
            for (int row=0; row < q->length; row++) {
                delete_rasqal_triple(ct, vec, triple, row);
                if (fs_rid_vector_length(vec[0]) > 0) {
                    fsp_delete_quads_all(ct->link, vec);
                }
            }
        }
        for (int s=0; s<4; s++) {
//fs_rid_vector_print(vec[s], 0, stdout);
            fs_rid_vector_free(vec[s]);
        }

        for (int t=0; t<raptor_sequence_size(toins_p); t++) {
            rasqal_triple *triple = raptor_sequence_get_at(toins_p, t);
            for (int row=0; row < q->length; row++) {
                insert_rasqal_triple(ct, triple, row);
            }
        }

        /* must not free the rasqal_query */
        q->rq = NULL;
        fs_query_free(q);
        ct->q = NULL;
    } else {