fs_value fn_uri(fs_query *q, fs_value a) { if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (a.valid & fs_valid_bit(FS_V_RID) && FS_IS_BNODE(a.rid)) { fs_value v = fs_value_blank(); v.lex = g_strdup_printf("bnode:b%llx", FS_BNODE_NUM(a.rid)); fs_query_add_freeable(q, v.lex); v.rid = fs_hash_uri_ignore_bnode(v.lex); v.valid = fs_valid_bit(FS_V_RID); v.attr = FS_RID_NULL; return v; } if (a.lex) { return fs_value_uri(a.lex); } a = fs_value_fill_lexical(q, a); fs_value v = fs_value_uri(a.lex); return v; }
void fs_value_print(fs_value v) { if (v.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { printf("error"); if (v.lex) { printf("(%s)", v.lex); } return; } if (v.attr == fs_c.xsd_double) { printf("db"); } else if (v.attr == fs_c.xsd_float) { printf("fl"); } else if (v.attr == fs_c.xsd_decimal) { printf("de"); } else if (v.attr == fs_c.xsd_integer) { printf("in"); } else if (v.attr == fs_c.xsd_boolean) { printf("bl"); } else if (v.attr == fs_c.xsd_string) { printf("st"); } else if (v.attr == fs_c.xsd_datetime) { printf("dt"); } else if (v.attr == fs_c.empty || v.attr == FS_RID_NULL) { if (v.rid == FS_RID_NULL) { printf("NULL"); } else if (FS_IS_BNODE(v.rid)) { printf("bnode"); } else if (FS_IS_URI(v.rid)) { printf("uri"); } else { printf("plain"); } } else { printf("attr:%llx", v.attr); } if (v.valid & fs_valid_bit(FS_V_RID)) { printf(" rid:%llx", v.rid); } if (v.lex) { printf(" l:%s", v.lex); } if (v.valid & fs_valid_bit(FS_V_FP)) { printf(" f:%f", v.fp); } if (v.valid & fs_valid_bit(FS_V_DE)) { char *dlex = fs_decimal_to_lex(&v.de); printf(" d:%s", dlex); free(dlex); } if (v.valid & fs_valid_bit(FS_V_IN)) { printf(" i:%lld", (long long)v.in); } }
fs_value fn_is_blank(fs_query *q, fs_value a) { if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (a.valid & fs_valid_bit(FS_V_RID)) { if (a.rid == FS_RID_NULL) return fs_value_boolean(0); return fs_value_boolean(FS_IS_BNODE(a.rid)); } return fs_value_boolean(0); }
int fs_resolve_rid(fs_backend *be, fs_segment segment, fs_rid rid, fs_resource *out) { out->rid = rid; out->attr = FS_RID_NULL; if (FS_IS_BNODE(rid)) { out->lex = g_strdup_printf("_:b%llx", rid); return 0; } return fs_rhash_get(be->res, out); }
static void map_bnodes(struct update_context *uc, fs_rid_vector *r) { for (int i=0; i<r->length; i++) { if (FS_IS_BNODE(r->data[i]) && r->data[i] != FS_RID_NULL) { char tmp[32]; sprintf(tmp, "f_%016llx", r->data[i]); raptor_term_blank_value bnode; bnode.string = (unsigned char *)tmp; bnode.string_len = 0; r->data[i] = fs_bnode_id(uc->link, bnode); } } }
int fs_is_plain_or_string(fs_value v) { if (fs_is_error(v)) { return 0; } if (FS_IS_BNODE(v.rid) || FS_IS_URI(v.rid)) { return 0; } if (v.attr != fs_c.empty && v.attr != fs_c.xsd_string) { return 0; } return 1; }
fs_value fn_str(fs_query *q, fs_value a) { if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { return a; } if (a.valid & fs_valid_bit(FS_V_RID) && FS_IS_BNODE(a.rid)) { return fs_value_error(FS_ERROR_INVALID_TYPE, NULL); } if (a.lex) { return fs_value_plain(a.lex); } fs_value v = fs_value_plain(NULL); a = fs_value_fill_lexical(q, a); v.lex = a.lex; return v; }
void fs_rid_vector_print_resolved(fs_backend *be, fs_rid_vector *v, int flags, FILE *out) { if (!v) { fprintf(out, "RID vector: (null)\n"); return; } fprintf(out, "RID vector (%d items)\n", v->length); for (int i=0; i<v->length; i++) { fs_resource res; fs_resolve_rid(be, v->data[i] & 0x7, v->data[i], &res); if (FS_IS_BNODE(v->data[i])) { fprintf(out, "%4d %llx %s\n", i, v->data[i], res.lex); } else if (FS_IS_URI(v->data[i])) { fprintf(out, "%4d %llx <%s>\n", i, v->data[i], res.lex); } else if (FS_IS_LITERAL(v->data[i])) { fprintf(out, "%4d %llx \"%s\"\n", i, v->data[i], res.lex); } else { fprintf(out, "%4d %llx ?%s?\n", i, v->data[i], res.lex); } } }
static int buffer_res(fsp_link *link, const int segments, fs_rid r, char *lex, fs_rid attr, int dryrun) { int seg = FS_RID_SEGMENT(r, segments); if (FS_IS_BNODE(r)) { return 1; } if (nodecache[r & CACHE_MASK] == r) { return 1; } if (!lex) { return 1; } nodecache[r & CACHE_MASK] = r; res_buffer[seg][res_pos[seg]].rid = r; res_buffer[seg][res_pos[seg]].attr = attr; if (strlen(lex) < RES_BUF_SIZE) { strcpy(lex_tmp[seg][res_pos[seg]], lex); res_buffer[seg][res_pos[seg]].lex = lex_tmp[seg][res_pos[seg]]; } else { res_buffer[seg][res_pos[seg]].lex = g_strdup(lex); } if (++res_pos[seg] == RES_BUF_SIZE) { if (!(dryrun & FS_DRYRUN_RESOURCES) && fsp_res_import(link, seg, res_pos[seg], res_buffer[seg])) { fs_error(LOG_ERR, "resource import failed"); return 1; } for (int i=0; i<res_pos[seg]; i++) { if (res_buffer[seg][i].lex != lex_tmp[seg][i]) { free(res_buffer[seg][i].lex); res_buffer[seg][i].lex = NULL; } } res_pos[seg] = 0; } return 0; }
fs_value fs_value_resource(fs_query *q, fs_resource *r) { fs_value v = fs_value_blank(); v.lex = r->lex; if (r->rid == FS_RID_NULL) { return fs_value_rid(FS_RID_NULL); } if (r->attr == fs_c.xsd_integer) { v = fn_cast_intl(q, v, fs_c.xsd_integer); } else if (r->attr == fs_c.xsd_float || r->attr == fs_c.xsd_double) { v = fn_cast_intl(q, v, fs_c.xsd_double); } else if (r->attr == fs_c.xsd_decimal) { v = fn_cast_intl(q, v, fs_c.xsd_decimal); } else if (r->attr == fs_c.xsd_boolean) { if (!strcmp(r->lex, "true") || !strcmp(r->lex, "1")) { v = fs_value_boolean(1); } else { v = fs_value_boolean(0); } } else if (r->attr == fs_c.xsd_datetime) { v = fs_value_datetime_from_string(r->lex); } if (fs_is_error(v)) { v = fs_value_blank(); v.lex = r->lex; } v.rid = r->rid; if (FS_IS_URI(v.rid) || FS_IS_BNODE(v.rid)) v.attr = fs_c.empty; else v.attr = r->attr; v.valid |= fs_valid_bit(FS_V_RID) | fs_valid_bit(FS_V_ATTR); return v; }
int fs_order_by_cmp(fs_value va, fs_value vb) { if (va.valid & fs_valid_bit(FS_V_RID) && va.rid == FS_RID_NULL) { if (vb.valid & fs_valid_bit(FS_V_RID) && vb.rid == FS_RID_NULL) { return 0; } return -1; } if (vb.valid & fs_valid_bit(FS_V_RID) && vb.rid == FS_RID_NULL) { return 1; } if (va.valid & fs_valid_bit(FS_V_RID) && FS_IS_BNODE(va.rid)) { if (vb.valid & fs_valid_bit(FS_V_RID) && FS_IS_BNODE(vb.rid)) { if (va.rid > vb.rid) { return 1; } else if (va.rid < vb.rid) { return -1; } return 0; } return -1; } if (vb.valid & fs_valid_bit(FS_V_RID) && FS_IS_BNODE(vb.rid)) { return 1; } if (va.valid & fs_valid_bit(FS_V_RID) && FS_IS_URI(va.rid)) { if (vb.valid & fs_valid_bit(FS_V_RID) && FS_IS_URI(vb.rid)) { int cmp = strcmp(va.lex, vb.lex); if (cmp != 0) return cmp; return 0; } return -1; } if (vb.valid & fs_valid_bit(FS_V_RID) && FS_IS_URI(vb.rid)) { return 1; } fs_value cmp = fn_equal(NULL, va, vb); if (!(cmp.valid & fs_valid_bit(FS_V_TYPE_ERROR)) && cmp.in) { return 0; } cmp = fn_less_than(NULL, va, vb); if (cmp.valid & fs_valid_bit(FS_V_TYPE_ERROR)) { if (va.lex && vb.lex) { int cmp = strcmp(va.lex, vb.lex); if (cmp != 0) { return cmp; } } /* TODO check for plain v's typed */ return 0; } if (cmp.in) { return -1; } else { return 1; } }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }