END_TEST START_TEST (check_fs_rid_vector_uniq) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 500; i++) { fs_rid_vector_append(v, i); fs_rid_vector_append(v, i); } fs_rid_vector_append(v,FS_RID_NULL); fail_if(fs_rid_vector_length(v) != (((500 - 100)*2) + 1), "fs_rid_vector_length failed"); fs_rid_vector_uniq(v, 0); //Not remove nulls fail_if(fs_rid_vector_length(v) != ((500 - 100) + 1), "fs_rid_vector_length failed"); for (i=1; i < fs_rid_vector_length(v) - 1; i++) { fail_if(v->data[i] == v->data[i-1], "unique values failed [%d %d]",v->data[i],v->data[i-1]); } fail_if(v->data[fs_rid_vector_length(v)-1] != FS_RID_NULL, "FS_RID_NULL isn't there."); v->data[123] = FS_RID_NULL; fs_rid_vector_uniq(v, 1); //remove nulls fail_if(fs_rid_vector_length(v) != ((500 - 100) -1), "fs_rid_vector_length failed"); fail_if(v->data[fs_rid_vector_length(v)-1] == FS_RID_NULL, "FS_RID_NULL should not be there %llx", v->data[fs_rid_vector_length(v)-1]); fail_if(v->data[123] == FS_RID_NULL, "FS_RID_NULL should not be there %llx", v->data[fs_rid_vector_length(v)-1]); fs_rid_vector_free(v); }
void fs_mhash_print(fs_mhash *mh, FILE *out, int verbosity) { if (!mh) { fs_error(LOG_CRIT, "tried to print NULL mhash"); return; } fs_mhash_entry e; fs_rid_vector *models = fs_rid_vector_new(0); fs_rid last_model = FS_RID_NULL; int entry = 0; int count = 0; fprintf(out, "mhash %s\n", mh->filename); fprintf(out, " count: %d\n", mh->count); fprintf(out, " size: %d\n", mh->size); fprintf(out, "\n"); lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET); while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) { if (e.val) { count++; if (verbosity > 0) { fprintf(out, "%8d %016llx %8d\n", entry, e.rid, e.val); } fs_rid_vector_append(models, e.rid); if (e.rid == last_model) { fprintf(out, "ERROR: %s model %016llx appears multiple times\n", mh->filename, e.rid); } last_model = e.rid; } entry++; } if (mh->count != count) { fprintf(out, "ERROR: %s header count %d != scanned count %d\n", mh->filename, mh->count, count); } int oldlength = models->length; fs_rid_vector_sort(models); fs_rid_vector_uniq(models, 0); if (models->length != oldlength) { fprintf(out, "ERROR: %s some models appear > 1 time\n", mh->filename); } }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to ā%sā", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }