/* UNION b onto a, returns a with b appended */ void fs_binding_union(fs_query *q, fs_binding *a, fs_binding *b) { const int alen = fs_binding_length(a); const int blen = fs_binding_length(b); a[0].vals->length = 0; for (int c=1; a[c].name && b[c].name; c++) { if (!a[c].bound && b[c].bound) { a[c].bound = 1; while (a[c].vals->length < alen) { fs_rid_vector_append(a[c].vals, FS_RID_NULL); } } else if (a[c].bound && !b[c].bound) { b[c].bound = 1; while (b[c].vals->length < blen) { fs_rid_vector_append(b[c].vals, FS_RID_NULL); } } fs_rid_vector_append_vector(a[c].vals, b[c].vals); } }
fs_binding *fs_binding_apply_filters(fs_query *q, int block, fs_binding *b, raptor_sequence *constr) { fs_binding *ret = fs_binding_copy(b); if (!constr) { /* if there's no constriants then we don't need to do anything */ return ret; } for (int col=0; b[col].name; col++) { ret[col].vals->length = 0; } int length = fs_binding_length(b); fs_binding *restore = q->bt; q->bt = b; /* TODO should prefetch lexical vals here */ /* expressions that have been optimised out will be replaces with NULL, * so we have to be careful here */ /* --------------------------- */ /* PREFETCH should go here XXX */ /* --------------------------- */ for (int row=0; row<length; row++) { for (int c=0; c<raptor_sequence_size(constr); c++) { rasqal_expression *e = raptor_sequence_get_at(constr, c); if (!e) continue; fs_value v = fs_expression_eval(q, row, block, e); #ifdef DEBUG_FILTER rasqal_expression_print(e, stdout); printf(" -> "); fs_value_print(v); printf("\n"); #endif if (v.valid & fs_valid_bit(FS_V_TYPE_ERROR) && v.lex) { q->warnings = g_slist_prepend(q->warnings, v.lex); } fs_value result = fn_ebv(v); /* its EBV is not true, so we skip to the next one */ if (result.valid & fs_valid_bit(FS_V_TYPE_ERROR) || !result.in) { continue; } for (int col=0; b[col].name; col++) { if (b[col].bound) { fs_rid_vector_append(ret[col].vals, b[col].vals->data[row]); } } } } q->bt = restore; return ret; }
void fs_binding_uniq(fs_binding *bi) { if (fs_binding_length(bi) < 2) { /* we don't need to do anything, code below assumes >= 1 row */ return; } fs_binding *b = fs_binding_copy_and_clear(bi); bi[0].vals->length = 0; #ifdef DEBUG_MERGE double then = fs_time(); #endif int length = fs_binding_length(b); int outrow = 1; for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, 0)); bi[column].bound = b[column].bound; b[column].sort = b[column].bound; } for (int row = 1; row < length; row++) { if (binding_row_compare(NULL, b, b, row, row-1, length, length) == 0) { continue; } for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, row)); } outrow++; } #ifdef DEBUG_MERGE double now = fs_time(); printf("uniq took %fs (%d->%d rows)\n", now-then, length, outrow); fs_binding_print(bi, stdout); #endif fs_binding_free(b); }
/* returns true if the expression has bound values, or nothing does */ int fs_opt_is_bound(fs_binding *b, rasqal_literal *l) { if (!l) return 0; switch (l->type) { case RASQAL_LITERAL_VARIABLE: { if (fs_binding_length(b) == 0) { return 1; } fs_binding *bv = fs_binding_get(b, l->value.variable); if (bv && bv->bound == 1) { return 1; } return 0; } case RASQAL_LITERAL_INTEGER_SUBTYPE: case RASQAL_LITERAL_XSD_STRING: case RASQAL_LITERAL_UDT: case RASQAL_LITERAL_URI: case RASQAL_LITERAL_STRING: case RASQAL_LITERAL_BOOLEAN: case RASQAL_LITERAL_INTEGER: case RASQAL_LITERAL_DOUBLE: case RASQAL_LITERAL_FLOAT: case RASQAL_LITERAL_DECIMAL: case RASQAL_LITERAL_DATETIME: #if RASQAL_VERSION >= 929 case RASQAL_LITERAL_DATE: #endif return 0; /* we shouldn't find any of these... */ case RASQAL_LITERAL_UNKNOWN: case RASQAL_LITERAL_BLANK: case RASQAL_LITERAL_PATTERN: case RASQAL_LITERAL_QNAME: return 0; } return 0; }
/* inplace quicksort on an array of rid_vectors */ void fs_binding_sort(fs_binding *b) { int scount = 0; int length = fs_binding_length(b); for (int i=0; b[i].name; i++) { if (b[i].sort) scount++; if (b[i].vals->length < length) { for (int j=b[i].vals->length; j<length; j++) { fs_rid_vector_append(b[i].vals, FS_RID_NULL); } } } if (!scount) { fs_error(LOG_WARNING, "fs_binding_sort() called with no sort " "columns set, ignoring"); return; } /* fill out the _ord column with integers in [0,n] */ b[0].vals->length = 0; for (int row=0; row<length; row++) { fs_rid_vector_append(b[0].vals, row); } if (length > 1) { #ifdef DEBUG_MERGE double then = fs_time(); #endif /* ctxt could include other stuff for optimisations */ struct sort_context ctxt = { b }; fs_qsort_r(b[0].vals->data, length, sizeof(fs_rid), qsort_r_cmp, &ctxt); #ifdef DEBUG_MERGE double now = fs_time(); printf("sort took %f seconds\n", now - then); #endif } }
static int update_op(struct update_context *uc) { fs_rid_vector *vec[4]; switch (uc->op->type) { case RASQAL_UPDATE_TYPE_UNKNOWN: add_message(uc, "Unknown update operation", 0); return 1; case RASQAL_UPDATE_TYPE_CLEAR: fs_clear(uc, graph_arg(uc->op->graph_uri)); return 0; case RASQAL_UPDATE_TYPE_CREATE: return 0; case RASQAL_UPDATE_TYPE_DROP: fs_clear(uc, graph_arg(uc->op->graph_uri)); return 0; case RASQAL_UPDATE_TYPE_LOAD: fs_load(uc, graph_arg(uc->op->document_uri), graph_arg(uc->op->graph_uri)); return 0; #if RASQAL_VERSION >= 924 case RASQAL_UPDATE_TYPE_ADD: fs_add(uc, graph_arg(uc->op->graph_uri), graph_arg(uc->op->document_uri)); return 0; case RASQAL_UPDATE_TYPE_MOVE: fs_move(uc, graph_arg(uc->op->graph_uri), graph_arg(uc->op->document_uri)); return 0; case RASQAL_UPDATE_TYPE_COPY: fs_copy(uc, graph_arg(uc->op->graph_uri), graph_arg(uc->op->document_uri)); return 0; #endif case RASQAL_UPDATE_TYPE_UPDATE: break; } fs_hash_freshen(); raptor_sequence *todel = NULL; raptor_sequence *toins = NULL; if (uc->op->delete_templates && !uc->op->where) { int where = 0; /* check to see if it's a DELETE WHERE { } */ for (int t=0; t<raptor_sequence_size(uc->op->delete_templates); t++) { rasqal_triple *tr = raptor_sequence_get_at(uc->op->delete_templates, t); if (any_vars(tr)) { where = 1; break; } } if (where) { fs_error(LOG_ERR, "DELETE WHERE { x } not yet supported"); add_message(uc, "DELETE WHERE { x } not yet supported, use DELETE { x } WHERE { x }", 0); return 1; } } #if RASQAL_VERSION >= 923 if (uc->op->where) { todel = raptor_new_sequence(NULL, NULL); toins = raptor_new_sequence(NULL, NULL); raptor_sequence *todel_p = raptor_new_sequence(NULL, NULL); raptor_sequence *toins_p = raptor_new_sequence(NULL, NULL); raptor_sequence *vars = raptor_new_sequence(NULL, NULL); fs_query *q = calloc(1, sizeof(fs_query)); uc->q = q; q->qs = uc->qs; q->rq = uc->rq; q->flags = FS_BIND_DISTINCT; #ifdef DEBUG_MERGE q->flags |= FS_QUERY_CONSOLE_OUTPUT; #endif q->boolean = 1; q->opt_level = 3; q->soft_limit = -1; q->segments = fsp_link_segments(uc->link); q->link = uc->link; q->bb[0] = fs_binding_new(); q->bt = q->bb[0]; /* hashtable to hold runtime created resources */ q->tmp_resources = g_hash_table_new_full(fs_rid_hash, fs_rid_equal, g_free, fs_free_cached_resource); /* add column to denote join ordering */ fs_binding_create(q->bb[0], "_ord", FS_RID_NULL, 0); if (uc->op->delete_templates) { for (int t=0; t<raptor_sequence_size(uc->op->delete_templates); t++) { rasqal_triple *tr = raptor_sequence_get_at(uc->op->delete_templates, t); if (any_vars(tr)) { fs_check_cons_slot(q, vars, tr->subject); fs_check_cons_slot(q, vars, tr->predicate); fs_check_cons_slot(q, vars, tr->object); raptor_sequence_push(todel_p, tr); } else { raptor_sequence_push(todel, tr); } } } if (uc->op->insert_templates) { for (int t=0; t<raptor_sequence_size(uc->op->insert_templates); t++) { rasqal_triple *tr = raptor_sequence_get_at(uc->op->insert_templates, t); if (any_vars(tr)) { fs_check_cons_slot(q, vars, tr->subject); fs_check_cons_slot(q, vars, tr->predicate); fs_check_cons_slot(q, vars, tr->object); raptor_sequence_push(toins_p, tr); } else { raptor_sequence_push(toins, tr); } } } q->num_vars = raptor_sequence_size(vars); for (int i=0; i < q->num_vars; i++) { rasqal_variable *v = raptor_sequence_get_at(vars, i); fs_binding_add(q->bb[0], v, FS_RID_NULL, 1); } /* perform the WHERE match */ fs_query_process_pattern(q, uc->op->where, vars); q->length = fs_binding_length(q->bb[0]); for (int s=0; s<4; s++) { vec[s] = fs_rid_vector_new(0); } for (int t=0; t<raptor_sequence_size(todel_p); t++) { rasqal_triple *triple = raptor_sequence_get_at(todel_p, t); for (int row=0; row < q->length; row++) { delete_rasqal_triple(uc, vec, triple, row); } if (fs_rid_vector_length(vec[0]) > 1000) { fsp_delete_quads_all(uc->link, vec); } } if (fs_rid_vector_length(vec[0]) > 0) { fsp_delete_quads_all(uc->link, vec); } for (int s=0; s<4; s++) { //fs_rid_vector_print(vec[s], 0, stdout); fs_rid_vector_free(vec[s]); vec[s] = NULL; } for (int t=0; t<raptor_sequence_size(toins_p); t++) { rasqal_triple *triple = raptor_sequence_get_at(toins_p, t); for (int row=0; row < q->length; row++) { insert_rasqal_triple(uc, triple, row); } } /* must not free the rasqal_query */ q->rq = NULL; fs_query_free(q); uc->q = NULL; } else { todel = uc->op->delete_templates; toins = uc->op->insert_templates; } #else if (uc->op->where) { fs_error(LOG_ERR, "DELETE/INSERT WHERE requires Rasqal 0.9.23 or newer"); add_message(uc, "DELETE/INSERT WHERE requires Rasqal 0.9.23 or newer", 0); } #endif /* delete constant triples */ if (todel) { for (int s=0; s<4; s++) { vec[s] = fs_rid_vector_new(0); } for (int t=0; t<raptor_sequence_size(todel); t++) { rasqal_triple *triple = raptor_sequence_get_at(todel, t); if (any_vars(triple)) { continue; } delete_rasqal_triple(uc, vec, triple, 0); } if (fs_rid_vector_length(vec[0]) > 0) { fsp_delete_quads_all(uc->link, vec); } for (int s=0; s<4; s++) { fs_rid_vector_free(vec[s]); vec[s] = NULL; } } /* insert constant triples */ if (toins) { for (int t=0; t<raptor_sequence_size(toins); t++) { rasqal_triple *triple = raptor_sequence_get_at(toins, t); if (any_vars(triple)) { continue; } insert_rasqal_triple(uc, triple, 0); } } fs_hash_freshen(); return 0; }
int fs_bind_freq(fs_query_state *qs, fs_query *q, int block, rasqal_triple *t) { int ret = 100; #if DEBUG_OPTIMISER char dir = 'X'; #endif if (!fs_opt_is_const(q->bb[block], t->subject) && !fs_opt_is_const(q->bb[block], t->predicate) && !fs_opt_is_const(q->bb[block], t->object) && !fs_opt_is_const(q->bb[block], t->origin)) { #if DEBUG_OPTIMISER dir = '?'; #endif ret = INT_MAX; } else if (!fs_opt_is_const(q->bb[block], t->subject) && !fs_opt_is_const(q->bb[block], t->object)) { #if DEBUG_OPTIMISER dir = '?'; #endif ret = INT_MAX - 100; } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1 && fs_opt_num_vals(q->bb[block], t->predicate) == 1) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = calc_freq(q, block, qs->freq_s, t->subject, t->predicate); } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1 && fs_opt_num_vals(q->bb[block], t->predicate) == 1) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = calc_freq(q, block, qs->freq_o, t->object, t->predicate) + q->segments * 50; } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = calc_freq(q, block, qs->freq_s, t->subject, NULL); } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = calc_freq(q, block, qs->freq_s, t->object, NULL) + q->segments * 50; /* cluases for if we have no freq data */ } else if (fs_opt_num_vals(q->bb[block], t->subject) < 1000000 && fs_opt_num_vals(q->bb[block], t->predicate) < 100 && fs_opt_num_vals(q->bb[block], t->object) == INT_MAX) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = fs_opt_num_vals(q->bb[block], t->subject) * fs_opt_num_vals(q->bb[block], t->predicate); if (!fs_opt_is_bound(q->bb[block], t->subject) && !fs_opt_is_bound(q->bb[block], t->predicate) && !fs_opt_is_bound(q->bb[block], t->object)) { ret *= (fs_binding_length(q->bb[block]) * 100); } } else if (fs_opt_num_vals(q->bb[block], t->object) < 1000000 && fs_opt_num_vals(q->bb[block], t->predicate) < 100 && fs_opt_num_vals(q->bb[block], t->subject) == INT_MAX) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = fs_opt_num_vals(q->bb[block], t->predicate) * fs_opt_num_vals(q->bb[block], t->object); if (!fs_opt_is_bound(q->bb[block], t->subject) && !fs_opt_is_bound(q->bb[block], t->predicate) && !fs_opt_is_bound(q->bb[block], t->object)) { ret *= (fs_binding_length(q->bb[block]) * 100); } } #if DEBUG_OPTIMISER if (q->flags & FS_QUERY_EXPLAIN) { printf("freq(%c, ", dir); rasqal_triple_print(t, stdout); printf(") = %d\n", ret); } #endif return ret; }
fs_binding *fs_binding_join(fs_query *q, fs_binding *a, fs_binding *b, fs_join_type join) { if (a == NULL) { return fs_binding_copy(b); } if (b == NULL) { return fs_binding_copy(a); } fs_binding *c = fs_binding_copy(a); int inter = 0; /* do the tables intersect */ for (int i=0; a[i].name; i++) { a[i].sort = 0; b[i].sort = 0; c[i].sort = 0; c[i].vals->length = 0; } int bound_a = 0; int bound_b = 0; for (int i=1; a[i].name; i++) { if (a[i].bound) bound_a++; if (b[i].bound) bound_b++; if (a[i].bound || b[i].bound) { c[i].bound = 1; } if (a[i].bound && b[i].bound) { inter = 1; a[i].sort = 1; b[i].sort = 1; #ifdef DEBUG_MERGE printf("joining on %s\n", a[i].name); #endif } } /* a and b bound variables do not intersect, we can just dump results */ if (!inter) { int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); for (int i=1; a[i].name; i++) { if (!a[i].bound) { for (int j=0; j<length_a; j++) { fs_rid_vector_append(c[i].vals, FS_RID_NULL); } } else { fs_rid_vector_append_vector(c[i].vals, a[i].vals); } if (!b[i].bound) { for (int j=0; j<length_b; j++) { fs_rid_vector_append(c[i].vals, FS_RID_NULL); } } else { fs_rid_vector_append_vector(c[i].vals, b[i].vals); } } #ifdef DEBUG_MERGE printf("append all, result:\n"); fs_binding_print(c, stdout); #endif return c; } int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); /* sort the two sets of bindings so they can be merged linearly */ fs_binding_sort(a); fs_binding_sort(b); #ifdef DEBUG_MERGE printf("a: %d bindings\n", fs_binding_length(a)); fs_binding_print(a, stdout); printf("b: %d bindings\n", fs_binding_length(b)); fs_binding_print(b, stdout); #endif /* If were running in restricted mode, truncate the binding tables */ if (q->flags & FS_QUERY_RESTRICTED) { int restricted = 0; fs_binding_truncate(a, q->soft_limit); if (length_a > fs_binding_length(a)) { length_a = fs_binding_length(a); restricted = 1; } fs_binding_truncate(b, q->soft_limit); if (length_b > fs_binding_length(b)) { length_b = fs_binding_length(b); restricted = 1; } if (restricted) { char *msg = "some results have been dropped to prevent overunning effort allocation"; q->warnings = g_slist_prepend(q->warnings, msg); } } int apos = 0; int bpos = 0; int cmp; while (apos < length_a) { if (join == FS_INNER && bpos >= length_b) break; cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b); if (cmp == -1) { /* A and B aren't compatible, A sorts lower, skip A or left join */ #if DEBUG_MERGE > 1 printf("[L] Ar=%d, Br=%d", apos, bpos); #endif if (join == FS_LEFT) { for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (a[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(a, col, apos)); #endif fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } else { #if DEBUG_MERGE > 1 printf(" %s=null", c[col].name); #endif fs_rid_vector_append(c[col].vals, FS_RID_NULL); } } } apos++; } else if (cmp == 0 || cmp == -2 || cmp == 2) { /* Both rows are equal (cmp == 0), or one row is null (cmp == -2, 2) */ /* Both rows match, find out what combinations bind and produce them */ #if DEBUG_MERGE > 1 printf("[I] Ar=%d, Br=%d", apos, bpos); #endif int range_a = apos+1; int range_b = bpos+1; while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++; while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++; int start_a = apos; int start_b = bpos; for (apos = start_a; apos<range_a; apos++) { for (bpos = start_b; bpos<range_b; bpos++) { for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (!a[col].bound && !b[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=null", c[col].name); #endif fs_rid_vector_append(c[col].vals, FS_RID_NULL); } else if (a[col].bound) { /* if were left joining and A is NULL, we want the * value from B */ if (join == FS_LEFT && table_value(a, col, apos) == FS_RID_NULL && b[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(b, col, bpos)); #endif fs_rid_vector_append(c[col].vals, table_value(b, col, bpos)); } else { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(a, col, apos)); #endif fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } } else { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(b, col, bpos)); #endif fs_rid_vector_append(c[col].vals, table_value(b, col, bpos)); } } } } /* this is actually unneccesary because the for loop will do the * same thing, but it's clearer */ apos = range_a; bpos = range_b; } else if (cmp == +1) { /* A and B aren't compatible, B sorts lower, skip B */ bpos++; } else { fs_error(LOG_ERR, "cmp=%d, value out of range", cmp); } #if DEBUG_MERGE > 1 printf("\n"); #endif } /* clear the _ord columns */ a[0].vals->length = 0; b[0].vals->length = 0; #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(c)); fs_binding_print(c, stdout); #endif return c; }
/* return to = from [X] to, this is used to perform joins inside blocks, it * saves allocations by doing most operations inplace, unlike fs_binding_join */ void fs_binding_merge(fs_query *q, int block, fs_binding *from, fs_binding *to) { fs_binding *inter_f = NULL; /* the intersecting column */ fs_binding *inter_t = NULL; /* the intersecting column */ for (int i=0; from[i].name; i++) { from[i].sort = 0; to[i].sort = 0; } int used = 0; for (int i=1; from[i].name; i++) { if (!from[i].bound || !to[i].bound) continue; if (from[i].used) used++; if (from[i].bound && to[i].bound) { inter_f = from+i; inter_t = to+i; from[i].sort = 1; to[i].sort = 1; #ifdef DEBUG_MERGE printf("@@ join on %s\n", to[i].name); #endif } } /* from and to bound variables do not intersect, we can just dump results, under some circustances we need to do a combinatorial explosion */ if (!inter_f && (fs_binding_length(from) == 0)) { const int length_f = fs_binding_length(from); const int length_t = fs_binding_length(to); for (int i=1; from[i].name; i++) { if (to[i].bound && !from[i].bound) { if (from[i].vals) { fs_rid_vector_free(from[i].vals); } from[i].vals = fs_rid_vector_new(length_f); for (int d=0; d<length_f; d++) { from[i].vals->data[d] = FS_RID_NULL; } from[i].bound = 1; } if (!from[i].bound) continue; if (!to[i].bound) { if (to[i].vals) { fs_rid_vector_free(to[i].vals); } to[i].vals = fs_rid_vector_new(length_t); for (int d=0; d<length_t; d++) { to[i].vals->data[d] = FS_RID_NULL; } } fs_rid_vector_append_vector(to[i].vals, from[i].vals); to[i].bound = 1; } #ifdef DEBUG_MERGE printf("append all, result:\n"); fs_binding_print(to, stdout); #endif return; } /* If were running in restricted mode, truncate the binding tables */ if (q->flags & FS_QUERY_RESTRICTED) { fs_binding_truncate(from, q->soft_limit); fs_binding_truncate(to, q->soft_limit); } int length_t = fs_binding_length(to); int length_f = fs_binding_length(from); /* ms8: this list keeps track of the vars to replace */ GList *rep_list = NULL; for (int i=1; to[i].name; i++) { if (to+i == inter_t || to[i].used || to[i].bound) { /* do nothing */ #if DEBUG_MERGE > 1 printf("@@ preserve %s\n", to[i].name); #endif } else if (from[i].bound && !to[i].bound) { #if DEBUG_MERGE > 1 printf("@@ replace %s\n", from[i].name); #endif to[i].bound = 1; if (to[i].vals) { if (to[i].vals->length != length_t) { fs_rid_vector_free(to[i].vals); to[i].vals = fs_rid_vector_new(length_t); } } else { to[i].vals = fs_rid_vector_new(length_t); } for (int d=0; d<length_t; d++) { to[i].vals->data[d] = FS_RID_NULL; } rep_list = g_list_append(rep_list, GINT_TO_POINTER(i)); } } /* sort the two sets of bindings so they can be merged linearly */ if (inter_f) { fs_binding_sort(from); fs_binding_sort(to); } else { /* make sure the tables are not marked sorted */ from[0].vals->length = 0; to[0].vals->length = 0; } #ifdef DEBUG_MERGE printf("old: %d bindings\n", fs_binding_length(from)); fs_binding_print(from, stdout); printf("new: %d bindings\n", fs_binding_length(to)); fs_binding_print(to, stdout); #endif int fpos = 0; int tpos = 0; while (fpos < length_f || tpos < length_t) { if (q->flags & FS_QUERY_RESTRICTED && fs_binding_length(to) >= q->soft_limit) { char *msg = g_strdup("some results have been dropped to prevent overunning time allocation"); q->warnings = g_slist_prepend(q->warnings, msg); break; } int cmp; cmp = binding_row_compare(q, from, to, fpos, tpos, length_f, length_t); if (cmp == 0) { /* both rows match */ int fp, tp = tpos; for (fp = fpos; binding_row_compare(q, from, to, fp, tpos, length_f, length_t) == 0; fp++) { #if DEBUG_MERGE > 1 if (fp == DEBUG_CUTOFF) { printf("...\n"); } #endif for (tp = tpos; 1; tp++) { if (binding_row_compare(q, from, to, fp, tp, length_f, length_t) == 0) { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { printf("STEP %d, %d ", fp-fpos, tp-tpos); } #endif if (fp == fpos) { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { if (inter_f) { printf("REPL %llx\n", inter_f->vals->data[fp]); } else { printf("REPL ???\n"); } } #endif for (int c=1; to[c].name; c++) { if (!from[c].bound && !to[c].bound) continue; if (from[c].bound && table_value(from, c, fp) == FS_RID_NULL) { continue; } if (from[c].bound && fp < from[c].vals->length) { long wrow = to[0].vals->length ? to[0].vals->data[tp] : tp; to[c].vals->data[wrow] = table_value(from, c, fp); if (to[c].vals->length <= tp) { to[c].vals->length = tp+1; } } } } else { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { printf("ADD\n"); } #endif for (int c=1; to[c].name; c++) { if (!from[c].bound && !to[c].bound) continue; if (from[c].bound && fp < from[c].vals->length) { fs_rid_vector_append(to[c].vals, table_value(from, c, fp)); } else { fs_rid_vector_append(to[c].vals, table_value(to, c, tp)); } } } } else { break; } } } tpos = tp; fpos = fp; } else if (cmp <= -1) { fpos++; } else if (cmp >= 1) { tpos++; } else { fs_error(LOG_CRIT, "unknown compare state %d in binding", cmp); } } /* clear the _ord columns */ from[0].vals->length = 0; to[0].vals->length = 0; /* ms8: INIT code to clean up rows that where not replaced */ if (rep_list) { unsigned char *to_del = fs_new_bit_array(length_t); int to_del_count = 0; while(rep_list) { int col_r = GPOINTER_TO_INT(rep_list->data); rep_list = g_list_next(rep_list); for (int d=0; d<length_t; d++) { if (to[col_r].vals->data[d] == FS_RID_NULL) { fs_bit_array_set(to_del, d, 0); to_del_count++; } } } g_list_free(rep_list); if (to_del_count) { int vars = 0; for (int i=1; to[i].name; i++) vars++; fs_rid_vector **clean = calloc(vars, sizeof(fs_rid_vector *)); for (int i=0;i<vars;i++) clean[i] = fs_rid_vector_new(0); for (int d = 0;d<length_t;d++) { if (fs_bit_array_get(to_del,d)) { for (int i=0;i<vars;i++) { fs_rid_vector_append(clean[i],to[i+1].vals->data[d]); } } } for (int i=1;i<=vars;i++) { free(to[i].vals->data); to[i].vals->data = clean[i-1]->data; to[i].vals->length = clean[i-1]->length; to[i].vals->size = clean[i-1]->size; free(clean[i-1]); } free(clean); } fs_bit_array_destroy(to_del); } /* ms8: END code to clean up rows that where not replaced */ #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(to)); fs_binding_print(to, stdout); #endif }
void fs_binding_print(fs_binding *b, FILE *out) { int length = fs_binding_length(b); fprintf(out, " "); if (b[0].vals->length) { fprintf(out, " "); } for (int c=1; b[c].name; c++) { if (b[c].bound) { fprintf(out, " %16.16s", b[c].name); } else { fprintf(out, " %12.12s", b[c].name); } } fprintf(out, "\n"); fprintf(out, " row"); if (b[0].vals->length) { fprintf(out, " order"); } for (int c=1; b[c].name; c++) { if (b[c].bound) { fprintf(out, " %c%c%c%c A%02d D%02d", b[c].proj ? 'p' : '-', b[c].used ? 'u' : '-', b[c].need_val ? 'n' : '-', b[c].bound ? 'b' : '-', b[c].appears, b[c].depends); } else { fprintf(out, " %c%c%c A%02d D%02d", b[c].proj ? 'p' : '-', b[c].used ? 'u' : '-', b[c].need_val ? 'n' : '-', b[c].appears, b[c].depends); } } fprintf(out, "\n"); for (long int lr=0; lr<length; lr++) { long int r = lr; if (b[0].vals->length) { r = b[0].vals->data[lr]; } fprintf(out, "%4ld", lr); if (b[0].vals->length) { fprintf(out, " %5ld", r); } for (int c=1; b[c].name; c++) { if (b[c].bound) { if (r < b[c].vals->length && b[c].vals->data[r] == FS_RID_NULL) { fprintf(out, " %16s", "null"); } else { fprintf(out, " %016llx", r < b[c].vals->length ? b[c].vals->data[r] : -1); } } else { fprintf(out, "%13s", "null"); } } fprintf(out, "\n"); #if !defined(DEBUG_MERGE) || DEBUG_MERGE < 3 if (length > 25 && lr > DEBUG_CUTOFF && (length - lr) > 2) { fprintf(out, " ...\n"); lr = length - 3; } #endif } }
fs_binding *fs_binding_minus(fs_query *q, fs_binding *a, fs_binding *b) { if (a == NULL) { return NULL; } if (b == NULL) { /* a - 0 = a */ return fs_binding_copy(a); } fs_binding *c = fs_binding_copy(a); int inter = 0; /* do the tables intersect */ for (int i=0; a[i].name; i++) { a[i].sort = 0; b[i].sort = 0; c[i].sort = 0; c[i].vals->length = 0; } int bound_a = 0; int bound_b = 0; for (int i=1; a[i].name; i++) { if (a[i].bound) bound_a++; if (b[i].bound) bound_b++; if (a[i].bound || b[i].bound) { c[i].bound = 1; } if (a[i].bound && b[i].bound) { inter = 1; a[i].sort = 1; b[i].sort = 1; #ifdef DEBUG_MERGE printf("joining on %s\n", a[i].name); #endif } } /* a and b bound variables do not intersect, return c (copy of a) */ if (!inter) { #ifdef DEBUG_MERGE printf("remove nothing, result:\n"); fs_binding_print(c, stdout); #endif return c; } int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); /* sort the two sets of bindings so they can be merged linearly */ fs_binding_sort(a); fs_binding_sort(b); #ifdef DEBUG_MERGE printf("a: %d bindings\n", fs_binding_length(a)); fs_binding_print(a, stdout); printf("b: %d bindings\n", fs_binding_length(b)); fs_binding_print(b, stdout); #endif int apos = 0; int bpos = 0; int cmp; while (apos < length_a) { cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b); if (cmp == -1 || cmp == -2) { /* A and B aren't compatible, keep A row */ for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (a[col].bound) { fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } else { fs_rid_vector_append(c[col].vals, FS_RID_NULL); } } apos++; } else if (cmp == 0) { /* Both rows are equal (cmp == 0), skip A row in result */ #if DEBUG_MERGE > 1 printf("[I] Ar=%d, Br=%d", apos, bpos); #endif int range_a = apos+1; int range_b = bpos+1; while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++; while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++; apos = range_a; bpos = range_b; } else if (cmp == +1 || cmp == +2) { /* A and B aren't compatible, B sorts lower, skip B or B row is NULL */ bpos++; } else { fs_error(LOG_ERR, "cmp=%d, value out of range", cmp); } } /* clear the _ord columns */ a[0].vals->length = 0; b[0].vals->length = 0; #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(c)); fs_binding_print(c, stdout); #endif return c; }
static int update_op(struct update_context *ct) { fs_rid_vector *vec[4]; switch (ct->op->type) { case RASQAL_UPDATE_TYPE_UNKNOWN: add_message(ct, "Unknown update operation", 0); return 1; case RASQAL_UPDATE_TYPE_CLEAR: fs_clear(ct, (char *)raptor_uri_as_string(ct->op->graph_uri)); return 0; case RASQAL_UPDATE_TYPE_CREATE: return 0; case RASQAL_UPDATE_TYPE_DROP: fs_clear(ct, (char *)raptor_uri_as_string(ct->op->graph_uri)); return 0; case RASQAL_UPDATE_TYPE_LOAD: fs_load(ct, (char *)raptor_uri_as_string(ct->op->document_uri), (char *)raptor_uri_as_string(ct->op->graph_uri)); return 0; case RASQAL_UPDATE_TYPE_UPDATE: break; } fs_hash_freshen(); raptor_sequence *todel = NULL; raptor_sequence *toins = NULL; if (ct->op->where) { todel = raptor_new_sequence(NULL, NULL); toins = raptor_new_sequence(NULL, NULL); raptor_sequence *todel_p = raptor_new_sequence(NULL, NULL); raptor_sequence *toins_p = raptor_new_sequence(NULL, NULL); raptor_sequence *vars = raptor_new_sequence(NULL, NULL); fs_query *q = calloc(1, sizeof(fs_query)); ct->q = q; q->qs = ct->qs; q->rq = ct->rq; q->flags = FS_BIND_DISTINCT; q->opt_level = 3; q->soft_limit = -1; q->segments = fsp_link_segments(ct->link); q->link = ct->link; q->bb[0] = fs_binding_new(); q->bt = q->bb[0]; /* add column to denote join ordering */ fs_binding_add(q->bb[0], "_ord", FS_RID_NULL, 0); struct pattern_data pd = { .q = q, .vars = vars, .patterns = NULL, .fixed = NULL }; if (ct->op->delete_templates) { pd.patterns = todel_p; pd.fixed = todel; for (int t=0; t<raptor_sequence_size(ct->op->delete_templates); t++) { rasqal_graph_pattern *gp = raptor_sequence_get_at(ct->op->delete_templates, t); assign_gp(gp, NULL, &pd); } } if (ct->op->insert_templates) { pd.patterns = toins_p; pd.fixed = toins; for (int t=0; t<raptor_sequence_size(ct->op->insert_templates); t++) { rasqal_graph_pattern *gp = raptor_sequence_get_at(ct->op->insert_templates, t); assign_gp(gp, NULL, &pd); } } q->num_vars = raptor_sequence_size(vars); for (int i=0; i < q->num_vars; i++) { rasqal_variable *v = raptor_sequence_get_at(vars, i); fs_binding *b = fs_binding_get(q->bb[0], (char *)v->name); if (b) { b->need_val = 1; } else { fs_binding_add(q->bb[0], (char *)v->name, FS_RID_NULL, 1); } } fs_query_process_pattern(q, ct->op->where, vars); q->length = fs_binding_length(q->bb[0]); for (int s=0; s<4; s++) { vec[s] = fs_rid_vector_new(0); } for (int t=0; t<raptor_sequence_size(todel_p); t++) { rasqal_triple *triple = raptor_sequence_get_at(todel_p, t); for (int row=0; row < q->length; row++) { delete_rasqal_triple(ct, vec, triple, row); if (fs_rid_vector_length(vec[0]) > 0) { fsp_delete_quads_all(ct->link, vec); } } } for (int s=0; s<4; s++) { //fs_rid_vector_print(vec[s], 0, stdout); fs_rid_vector_free(vec[s]); } for (int t=0; t<raptor_sequence_size(toins_p); t++) { rasqal_triple *triple = raptor_sequence_get_at(toins_p, t); for (int row=0; row < q->length; row++) { insert_rasqal_triple(ct, triple, row); } } /* must not free the rasqal_query */ q->rq = NULL; fs_query_free(q); ct->q = NULL; } else {