fs_value fn_equal(fs_query *q, fs_value a, fs_value b) { #if 0 fs_value_print(a); printf(" = "); fs_value_print(b); printf("\n"); #endif if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (b.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return b; } fs_value term_equal = fn_rdfterm_equal(q, a, b); if (term_equal.in == 1) { return term_equal; } if (a.attr == fs_c.xsd_datetime) return fn_datetime_equal(q, a, b); if (fs_is_numeric(&a) && fs_is_numeric(&b)) return fn_numeric_equal(q, a, b); if (FS_IS_LITERAL(a.rid) && FS_IS_LITERAL(b.rid) && (a.attr == fs_c.empty || a.attr == fs_c.xsd_string) && (b.attr == fs_c.empty || b.attr == fs_c.xsd_string)) { return fs_value_boolean(!strcmp(a.lex, b.lex)); } return fs_value_boolean(0); }
void fs_rid_vector_append_vector_no_nulls_lit(fs_rid_vector *v, fs_rid_vector *v2) { if (!v2) return; for (int j=0; j<v2->length; j++) { if (v2->data[j] != FS_RID_NULL && !FS_IS_LITERAL(v2->data[j])) { fs_rid_vector_append(v, v2->data[j]); } } }
fs_value fn_is_literal(fs_query *q, fs_value a) { if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (a.valid & fs_valid_bit(FS_V_RID)) { return fs_value_boolean(FS_IS_LITERAL(a.rid)); } return fs_value_boolean(a.attr != FS_RID_NULL); }
/* return true if arg1 and arg2 are compatible, as per * http://www.w3.org/TR/sparql11-query/#func-arg-compatibility */ int fs_arg_compatible(fs_value arg1, fs_value arg2) { /* The arguments are simple literals or literals typed as xsd:string */ if (fs_is_plain_or_string(arg1) && fs_is_plain_or_string(arg2)) { return 1; } /* The arguments are plain literals with identical language tags */ if (arg1.attr == arg2.attr && FS_IS_LITERAL(arg1.attr)) { return 1; } /* The first argument is a plain literal with language tag and the second * argument is a simple literal or literal typed as xsd:string */ if (FS_IS_LITERAL(arg1.attr) && fs_is_plain_or_string(arg2)) { return 1; } return 0; }
fs_value fn_compare(fs_query *q, fs_value a, fs_value b) { if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (b.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return b; } #if 0 fs_value_print(a); printf(" <=> "); fs_value_print(b); printf("\n"); #endif if ((FS_IS_LITERAL(a.attr) && FS_IS_LITERAL(b.attr)) || (a.attr == fs_c.empty && b.attr == fs_c.empty)) { if (a.lex && b.lex) { int diff = strcmp(a.lex, b.lex); if (diff > 0) { return fs_value_integer(1); } else if (diff < 0) { return fs_value_integer(-1); } return fs_value_integer(0); } } else if (a.attr == fs_c.xsd_string && b.attr == fs_c.xsd_string) { if (a.lex && b.lex) { int diff = strcmp(a.lex, b.lex); if (diff > 0) { return fs_value_integer(1); } else if (diff < 0) { return fs_value_integer(-1); } return fs_value_integer(0); } } return fs_value_error(FS_ERROR_INVALID_TYPE, "bad arguments to fn:compare"); }
fs_value fn_datatype(fs_query *q, fs_value a) { #if 0 printf("datatype("); fs_value_print(a); printf(")\n"); #endif if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (a.valid & fs_valid_bit(FS_V_RID) && FS_IS_URI_BN(a.rid)) { return fs_value_error(FS_ERROR_INVALID_TYPE, NULL); } if (a.attr == FS_RID_NULL) { return fs_value_error(FS_ERROR_INVALID_TYPE, NULL); } if (FS_IS_LITERAL(a.attr) && a.attr != fs_c.empty) { return fs_value_error(FS_ERROR_INVALID_TYPE, NULL); } else { if (a.attr == fs_c.xsd_string || a.attr == fs_c.empty) { return fs_value_uri(XSD_STRING); } else if (a.attr == fs_c.xsd_double) { return fs_value_uri(XSD_DOUBLE); } else if (a.attr == fs_c.xsd_float) { return fs_value_uri(XSD_FLOAT); } else if (a.attr == fs_c.xsd_decimal) { return fs_value_uri(XSD_DECIMAL); } else if (a.attr == fs_c.xsd_integer) { return fs_value_uri(XSD_INTEGER); } else if (a.attr == fs_c.xsd_boolean) { return fs_value_uri(XSD_BOOLEAN); } else if (a.attr == fs_c.xsd_datetime) { return fs_value_uri(XSD_DATETIME); } } fs_rid_vector *r = fs_rid_vector_new(1); r->data[0] = a.attr; fs_resource res; if (fs_query_link(q)) { fsp_resolve(fs_query_link(q), FS_RID_SEGMENT(a.attr, fsp_link_segments(fs_query_link(q))), r, &res); fs_rid_vector_free(r); return fs_value_uri(res.lex); } return fs_value_uri("error:unresloved"); }
static int insert_rasqal_triple(struct update_context *uc, rasqal_triple *triple, int row) { fs_rid quad_buf[1][4]; fs_resource res; if (triple->origin) { fs_resource_from_rasqal_literal(uc, triple->origin, &res, row); quad_buf[0][0] = fs_hash_rasqal_literal(uc, triple->origin, row); } else if (uc->op->graph_uri) { res.lex = (char *)raptor_uri_as_string(uc->op->graph_uri); res.attr = FS_RID_NULL; quad_buf[0][0] = fs_hash_uri((char *)raptor_uri_as_string(uc->op->graph_uri)); } else { quad_buf[0][0] = fs_c.default_graph; res.lex = FS_DEFAULT_GRAPH; res.attr = FS_RID_NULL; } if (quad_buf[0][0] == fs_c.system_config) fsp_reload_acl_system(uc->link); if (!FS_IS_URI(quad_buf[0][0])) { return 1; } quad_buf[0][1] = fs_hash_rasqal_literal(uc, triple->subject, row); if (FS_IS_LITERAL(quad_buf[0][1])) { return 1; } quad_buf[0][2] = fs_hash_rasqal_literal(uc, triple->predicate, row); if (!FS_IS_URI(quad_buf[0][2])) { return 1; } quad_buf[0][3] = fs_hash_rasqal_literal(uc, triple->object, row); res.rid = quad_buf[0][0]; if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][0], uc->segments), 1, &res); res.rid = quad_buf[0][1]; fs_resource_from_rasqal_literal(uc, triple->subject, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), 1, &res); res.rid = quad_buf[0][2]; fs_resource_from_rasqal_literal(uc, triple->predicate, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][2], uc->segments), 1, &res); res.rid = quad_buf[0][3]; fs_resource_from_rasqal_literal(uc, triple->object, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][3], uc->segments), 1, &res); fsp_quad_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), FS_BIND_BY_SUBJECT, 1, quad_buf); //printf("I %016llx %016llx %016llx %016llx\n", quad_buf[0][0], quad_buf[0][1], quad_buf[0][2], quad_buf[0][3]); return 0; }
fs_value fn_cast(fs_query *q, fs_value v, fs_value d) { #if 0 printf("CAST "); fs_value_print(v); printf(" -> "); fs_value_print(d); printf("\n"); #endif if (FS_IS_URI(d.rid) && FS_IS_LITERAL(v.rid)) { return fn_cast_intl(q, v, d.rid); } if (d.rid == fs_c.xsd_string && FS_IS_URI(v.rid)) { fs_value v2 = fn_cast_intl(q, v, d.rid); v2.rid = fs_hash_literal(v.lex, d.rid); return v2; } return fs_value_error(FS_ERROR_INVALID_TYPE, "cast on URI/bNode"); }
fs_value fn_not_equal(fs_query *q, fs_value a, fs_value b) { #if 0 fs_value_print(a); printf(" != "); fs_value_print(b); printf("\n"); #endif if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (b.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return b; } if (a.attr == fs_c.xsd_datetime) return fn_not(q, fn_datetime_equal(q, a, b)); if (fs_is_numeric(&a) && fs_is_numeric(&b)) return fn_not(q, fn_numeric_equal(q, a, b)); if ((a.attr == fs_c.empty || a.attr == fs_c.xsd_string) && (b.attr == fs_c.empty || b.attr == fs_c.xsd_string)) { return fs_value_boolean(strcmp(a.lex, b.lex)); } if ((FS_IS_URI_BN(a.rid) && FS_IS_LITERAL(b.rid)) || (FS_IS_LITERAL(a.rid) && FS_IS_URI_BN(b.rid))) { /* ones a URI/bNode and ones a literal, definatly different */ return fs_value_boolean(1); } if ((!FS_IS_URI(a.rid) && a.attr != fs_c.empty && FS_IS_LITERAL(a.attr) && !FS_IS_LITERAL(b.attr)) || (!FS_IS_URI(a.rid) && !FS_IS_LITERAL(a.attr) && b.attr != fs_c.empty && FS_IS_LITERAL(b.attr))) { /* one has a lang tag and one doesn't, definatly different */ return fs_value_boolean(1); } if (FS_IS_URI(a.attr) || FS_IS_URI(b.attr)) { /* at least one argument has an unknown datatype */ return fs_value_boolean(0); } return fn_not(q, fn_rdfterm_equal(q, a, b)); }
void fs_rid_vector_print_resolved(fs_backend *be, fs_rid_vector *v, int flags, FILE *out) { if (!v) { fprintf(out, "RID vector: (null)\n"); return; } fprintf(out, "RID vector (%d items)\n", v->length); for (int i=0; i<v->length; i++) { fs_resource res; fs_resolve_rid(be, v->data[i] & 0x7, v->data[i], &res); if (FS_IS_BNODE(v->data[i])) { fprintf(out, "%4d %llx %s\n", i, v->data[i], res.lex); } else if (FS_IS_URI(v->data[i])) { fprintf(out, "%4d %llx <%s>\n", i, v->data[i], res.lex); } else if (FS_IS_LITERAL(v->data[i])) { fprintf(out, "%4d %llx \"%s\"\n", i, v->data[i], res.lex); } else { fprintf(out, "%4d %llx ?%s?\n", i, v->data[i], res.lex); } } }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }