static void roqet_query_walk(rasqal_query *rq, FILE *fh, int indent) { rasqal_query_verb verb; int i; rasqal_graph_pattern* gp; raptor_sequence *seq; verb = rasqal_query_get_verb(rq); roqet_write_indent(fh, indent); fprintf(fh, "query verb: %s\n", rasqal_query_verb_as_string(verb)); i = rasqal_query_get_distinct(rq); if(i != 0) { roqet_write_indent(fh, indent); fprintf(fh, "query asks for distinct results\n"); } i = rasqal_query_get_limit(rq); if(i >= 0) { roqet_write_indent(fh, indent); fprintf(fh, "query asks for result limits %d\n", i); } i = rasqal_query_get_offset(rq); if(i >= 0) { roqet_write_indent(fh, indent); fprintf(fh, "query asks for result offset %d\n", i); } seq = rasqal_query_get_bound_variable_sequence(rq); if(seq && raptor_sequence_size(seq) > 0) { fprintf(fh, "query bound variables (%d): ", raptor_sequence_size(seq)); i = 0; while(1) { rasqal_variable* v = (rasqal_variable*)raptor_sequence_get_at(seq, i); if(!v) break; if(i > 0) fputs(", ", fh); roqet_query_write_variable(fh, v); i++; } fputc('\n', fh); } gp = rasqal_query_get_query_graph_pattern(rq); if(!gp) return; seq = rasqal_query_get_construct_triples_sequence(rq); if(seq && raptor_sequence_size(seq) > 0) { roqet_write_indent(fh, indent); fprintf(fh, "query construct triples (%d) {\n", raptor_sequence_size(seq)); i = 0; while(1) { rasqal_triple* t = rasqal_query_get_construct_triple(rq, i); if(!t) break; roqet_write_indent(fh, indent + 2); fprintf(fh, "triple #%d { ", i); rasqal_triple_print(t, fh); fputs(" }\n", fh); i++; } roqet_write_indent(fh, indent); fputs("}\n", fh); } /* look for binding rows */ seq = rasqal_query_get_bindings_variables_sequence(rq); if(seq) { roqet_write_indent(fh, indent); fprintf(fh, "bindings variables (%d): ", raptor_sequence_size(seq)); i = 0; while(1) { rasqal_variable* v = rasqal_query_get_bindings_variable(rq, i); if(!v) break; if(i > 0) fputs(", ", fh); roqet_query_write_variable(fh, v); i++; } fputc('\n', fh); seq = rasqal_query_get_bindings_rows_sequence(rq); fprintf(fh, "bindings rows (%d) {\n", raptor_sequence_size(seq)); i = 0; while(1) { rasqal_row* row; row = rasqal_query_get_bindings_row(rq, i); if(!row) break; roqet_write_indent(fh, indent + 2); fprintf(fh, "row #%d { ", i); rasqal_row_print(row, fh); fputs("}\n", fh); i++; } } fputs("query ", fh); roqet_graph_pattern_walk(gp, -1, fh, indent); }
int fs_bind_freq(fs_query_state *qs, fs_query *q, int block, rasqal_triple *t) { int ret = 100; #if DEBUG_OPTIMISER char dir = 'X'; #endif if (!fs_opt_is_const(q->bb[block], t->subject) && !fs_opt_is_const(q->bb[block], t->predicate) && !fs_opt_is_const(q->bb[block], t->object) && !fs_opt_is_const(q->bb[block], t->origin)) { #if DEBUG_OPTIMISER dir = '?'; #endif ret = INT_MAX; } else if (!fs_opt_is_const(q->bb[block], t->subject) && !fs_opt_is_const(q->bb[block], t->object)) { #if DEBUG_OPTIMISER dir = '?'; #endif ret = INT_MAX - 100; } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1 && fs_opt_num_vals(q->bb[block], t->predicate) == 1) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = calc_freq(q, block, qs->freq_s, t->subject, t->predicate); } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1 && fs_opt_num_vals(q->bb[block], t->predicate) == 1) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = calc_freq(q, block, qs->freq_o, t->object, t->predicate) + q->segments * 50; } else if (qs->freq_s && fs_opt_num_vals(q->bb[block], t->subject) == 1) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = calc_freq(q, block, qs->freq_s, t->subject, NULL); } else if (qs->freq_o && fs_opt_num_vals(q->bb[block], t->object) == 1) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = calc_freq(q, block, qs->freq_s, t->object, NULL) + q->segments * 50; /* cluases for if we have no freq data */ } else if (fs_opt_num_vals(q->bb[block], t->subject) < 1000000 && fs_opt_num_vals(q->bb[block], t->predicate) < 100 && fs_opt_num_vals(q->bb[block], t->object) == INT_MAX) { #if DEBUG_OPTIMISER dir = 's'; #endif ret = fs_opt_num_vals(q->bb[block], t->subject) * fs_opt_num_vals(q->bb[block], t->predicate); if (!fs_opt_is_bound(q->bb[block], t->subject) && !fs_opt_is_bound(q->bb[block], t->predicate) && !fs_opt_is_bound(q->bb[block], t->object)) { ret *= (fs_binding_length(q->bb[block]) * 100); } } else if (fs_opt_num_vals(q->bb[block], t->object) < 1000000 && fs_opt_num_vals(q->bb[block], t->predicate) < 100 && fs_opt_num_vals(q->bb[block], t->subject) == INT_MAX) { #if DEBUG_OPTIMISER dir = 'o'; #endif ret = fs_opt_num_vals(q->bb[block], t->predicate) * fs_opt_num_vals(q->bb[block], t->object); if (!fs_opt_is_bound(q->bb[block], t->subject) && !fs_opt_is_bound(q->bb[block], t->predicate) && !fs_opt_is_bound(q->bb[block], t->object)) { ret *= (fs_binding_length(q->bb[block]) * 100); } } #if DEBUG_OPTIMISER if (q->flags & FS_QUERY_EXPLAIN) { printf("freq(%c, ", dir); rasqal_triple_print(t, stdout); printf(") = %d\n", ret); } #endif return ret; }
static void roqet_graph_pattern_walk(rasqal_graph_pattern *gp, int gp_index, FILE *fh, int indent) { int triple_index = 0; rasqal_graph_pattern_operator op; int seen; raptor_sequence *seq; int idx; rasqal_expression* expr; rasqal_variable* var; rasqal_literal* literal; op = rasqal_graph_pattern_get_operator(gp); roqet_write_indent(fh, indent); fprintf(fh, "%s graph pattern", rasqal_graph_pattern_operator_as_string(op)); idx = rasqal_graph_pattern_get_index(gp); if(idx >= 0) fprintf(fh, "[%d]", idx); if(gp_index >= 0) fprintf(fh, " #%d", gp_index); fputs(" {\n", fh); indent += 2; /* look for LET variable and value */ var = rasqal_graph_pattern_get_variable(gp); if(var) { roqet_write_indent(fh, indent); fprintf(fh, "%s := ", var->name); rasqal_expression_print(var->expression, fh); } /* look for GRAPH literal */ literal = rasqal_graph_pattern_get_origin(gp); if(literal) { roqet_write_indent(fh, indent); fputs("origin ", fh); rasqal_literal_print(literal, fh); fputc('\n', fh); } /* look for SERVICE literal */ literal = rasqal_graph_pattern_get_service(gp); if(literal) { roqet_write_indent(fh, indent); rasqal_literal_print(literal, fh); fputc('\n', fh); } /* look for triples */ seen = 0; while(1) { rasqal_triple* t; t = rasqal_graph_pattern_get_triple(gp, triple_index); if(!t) break; if(!seen) { roqet_write_indent(fh, indent); fputs("triples {\n", fh); seen = 1; } roqet_write_indent(fh, indent + 2); fprintf(fh, "triple #%d { ", triple_index); rasqal_triple_print(t, fh); fputs(" }\n", fh); triple_index++; } if(seen) { roqet_write_indent(fh, indent); fputs("}\n", fh); } /* look for sub-graph patterns */ seq = rasqal_graph_pattern_get_sub_graph_pattern_sequence(gp); if(seq && raptor_sequence_size(seq) > 0) { roqet_write_indent(fh, indent); fprintf(fh, "sub-graph patterns (%d) {\n", raptor_sequence_size(seq)); gp_index = 0; while(1) { rasqal_graph_pattern* sgp; sgp = rasqal_graph_pattern_get_sub_graph_pattern(gp, gp_index); if(!sgp) break; roqet_graph_pattern_walk(sgp, gp_index, fh, indent + 2); gp_index++; } roqet_write_indent(fh, indent); fputs("}\n", fh); } /* look for filter */ expr = rasqal_graph_pattern_get_filter_expression(gp); if(expr) { roqet_write_indent(fh, indent); fputs("filter { ", fh); rasqal_expression_print(expr, fh); fputs("}\n", fh); } indent -= 2; roqet_write_indent(fh, indent); fputs("}\n", fh); }
int fs_optimise_triple_pattern(fs_query_state *qs, fs_query *q, int block, rasqal_triple *patt[], int length, int start) { if (length - start < 2 || q->opt_level < 1) { return 1; } rasqal_triple **pbuf = malloc(length * sizeof(rasqal_triple *)); memcpy(pbuf, patt, sizeof(rasqal_triple *) * length); memset(patt, 0, length * sizeof(rasqal_triple *)); int append_pos = start; for (int i=0; i<start; i++) { pbuf[i] = patt[i]; } /* roughly sort into order: * const subject and predicate * const predicate and object * const subject * const object * const graph * const predicate * all variable */ #if 0 /* this code complicates things greatly, so I've removed it for now - swh should maybe be reexamined if/when we get histograms back */ for (int i=start; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_bind_freq(qs, q, block, pbuf[i]) == 1) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } #endif for (int i=start; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->subject) && fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && fs_opt_is_bound(q->bb[block], pbuf[i]->object)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } /* triples like :s :p ?o, where :p != rdf:type */ for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->subject) && fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && fs_opt_is_bound(q->bb[block], pbuf[i]->object) && !fs_opt_literal_is_rdf_type(pbuf[i]->predicate)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } /* triples like :s :p _, where :p != rdf:type */ for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->subject) && fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && !fs_opt_literal_is_rdf_type(pbuf[i]->predicate)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } /* triples like ?s :p :o, where :p != rdf:type */ for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_bound(q->bb[block], pbuf[i]->subject) && fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && fs_opt_is_const(q->bb[block], pbuf[i]->object) && !fs_opt_literal_is_rdf_type(pbuf[i]->predicate)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } /* triples like ?s rdf:type :o */ for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_bound(q->bb[block], pbuf[i]->subject) && fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && fs_opt_is_const(q->bb[block], pbuf[i]->object)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->subject) && fs_opt_is_bound(q->bb[block], pbuf[i]->object)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->object) && fs_opt_is_bound(q->bb[block], pbuf[i]->subject)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->predicate) && (fs_opt_is_bound(q->bb[block], pbuf[i]->subject) || fs_opt_is_bound(q->bb[block], pbuf[i]->object))) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } if (fs_opt_is_const(q->bb[block], pbuf[i]->origin)) { patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } } for (int i=0; i<length; i++) { if (!pbuf[i]) { continue; } patt[append_pos++] = pbuf[i]; pbuf[i] = NULL; } free(pbuf); if (append_pos != length) { fs_error(LOG_CRIT, "Optimser mismatch error"); } #ifdef DEBUG_OPTIMISER printf("optimiser choices look like:\n"); for (int i=start; i<length; i++) { printf("%4d: ", i); rasqal_triple_print(patt[i], stdout); printf("\n"); } #endif /* If the next two or more pattern's subjects are both variables, we might be able * to multi reverse bind them */ if (var_name(patt[start]->subject) && var_name(patt[start+1]->subject) && !var_name(patt[start]->predicate) && !var_name(patt[start]->object) && fs_opt_num_vals(q->bb[block], patt[start]->predicate) == 1 && fs_opt_num_vals(q->bb[block], patt[start]->origin) == 0 && fs_opt_num_vals(q->bb[block], patt[start+1]->origin) == 0) { char *svname = var_name(patt[start]->subject); int count = 1; while (start+count < length && !fs_opt_is_const(q->bb[block], patt[start+count]->subject) && !strcmp(svname, var_name(patt[start+count]->subject)) && !var_name(patt[start+count]->object) && !var_name(patt[start+count]->predicate)) { count++; } /* if we found a reverse bind pair then we may as well use that, rather * than pressing on and using the freq data to pick an order, the * backend has more complete information */ if (count > 1) return count; } if (length - start > 1) { int freq_a = fs_bind_freq(qs, q, block, patt[start]); int freq_b = fs_bind_freq(qs, q, block, patt[start+1]); /* the 2nd is cheaper than the 1st, then swap them */ if (freq_b < freq_a) { rasqal_triple *tmp = patt[start]; patt[start] = patt[start+1]; patt[start+1] = tmp; } } return 1; }