END_TEST START_TEST (check_fs_rid_vector_sort) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 1e4; i++) { if (i % 2) { fs_rid_vector_append(v, i); fs_rid_vector_append(v, i+3); fs_rid_vector_append(v, i-10); } else { fs_rid_vector_append(v, i-1); fs_rid_vector_append(v, i+5); fs_rid_vector_append(v, i); } } fs_rid_vector_sort(v); fail_if(fs_rid_vector_length(v) != ((1e4 - 100) *3), "Length fail after sort"); for (i=100; i < 1e4; i++) { fail_if ( !fs_rid_vector_contains(v, i), "Contain failed after sort"); } for (i=0;i<fs_rid_vector_length(v) -1; i++) { fail_if (v->data[i] > v->data[i+1], "Sort does not match."); } fs_rid_vector_free(v); }
void fs_mhash_print(fs_mhash *mh, FILE *out, int verbosity) { if (!mh) { fs_error(LOG_CRIT, "tried to print NULL mhash"); return; } fs_mhash_entry e; fs_rid_vector *models = fs_rid_vector_new(0); fs_rid last_model = FS_RID_NULL; int entry = 0; int count = 0; fprintf(out, "mhash %s\n", mh->filename); fprintf(out, " count: %d\n", mh->count); fprintf(out, " size: %d\n", mh->size); fprintf(out, "\n"); lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET); while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) { if (e.val) { count++; if (verbosity > 0) { fprintf(out, "%8d %016llx %8d\n", entry, e.rid, e.val); } fs_rid_vector_append(models, e.rid); if (e.rid == last_model) { fprintf(out, "ERROR: %s model %016llx appears multiple times\n", mh->filename, e.rid); } last_model = e.rid; } entry++; } if (mh->count != count) { fprintf(out, "ERROR: %s header count %d != scanned count %d\n", mh->filename, mh->count, count); } int oldlength = models->length; fs_rid_vector_sort(models); fs_rid_vector_uniq(models, 0); if (models->length != oldlength) { fprintf(out, "ERROR: %s some models appear > 1 time\n", mh->filename); } }
/* Read runtime.info and metadata.nt to fill in info for a kb. * Leave ipaddr unset, caller can set if needed. * * Returns 0 on normal operation, -1 on error * * err is set to one of: * ADM_ERR_SEE_ERRNO - check errno to find error * ADM_ERR_GENERIC - usually std lib error where errno not set * ADM_ERR_KB_NOT_EXISTS - KB requested does not exist * ADM_ERR_KB_GET_INFO - KB exists, but runtime/metadata unreadable * ADM_ERR_OK - no errors */ int fsab_kb_info_init(fsa_kb_info *ki, const unsigned char *kb_name, int *err) { fsa_error(LOG_DEBUG, "init kb info for '%s'", kb_name); FILE *ri_file; int len, rv; char *path; fs_metadata *md; struct stat info; ki->name = (unsigned char *)strdup((char *)kb_name); /* check if kb exists */ len = (strlen(FS_KB_DIR)-2) + strlen((char *)kb_name) + 1; path = (char *)malloc(len * sizeof(char)); if (path == NULL) { errno = ENOMEM; *err = ADM_ERR_SEE_ERRNO; return -1; } /* generate full path to kb dir */ rv = sprintf(path, FS_KB_DIR, kb_name); if (rv < 0) { *err = ADM_ERR_GENERIC; fsa_error(LOG_DEBUG, "sprintf failed"); free(path); return -1; } rv = stat(path, &info); free(path); if (rv == -1) { if (errno == ENOENT) { /* not an error, return empty kb info, but let caller know */ fsa_error(LOG_DEBUG, "kb '%s' does not exist", kb_name); *err = ADM_ERR_KB_NOT_EXISTS; return 0; } else { fsa_error(LOG_DEBUG, "stat error for kb '%s': %s", kb_name, strerror(errno)); *err = ADM_ERR_SEE_ERRNO; return -1; } } /* alloc mem for string path to runtime.info */ len = (strlen(FS_RI_FILE)-2) + strlen((char *)kb_name) + 1; path = (char *)malloc(len * sizeof(char)); if (path == NULL) { errno = ENOMEM; *err = ADM_ERR_SEE_ERRNO; return -1; } /* generate full path to runtime.info */ rv = sprintf(path, FS_RI_FILE, kb_name); if (rv < 0) { *err = ADM_ERR_GENERIC; fsa_error(LOG_DEBUG, "sprintf failed"); free(path); return -1; } /* attempt to open file for reading, ignore failures, but log them */ ri_file = fopen(path, "r"); if (ri_file == NULL) { fsa_error(LOG_ERR, "failed to read runtime info file at '%s': %s", path, strerror(errno)); *err = ADM_ERR_KB_GET_INFO; free(path); } else { free(path); /* check lock on file, and ignore if not locked - info is stale */ struct flock ri_lock; int fd = fileno(ri_file); ri_lock.l_type = F_WRLCK; /* write lock */ ri_lock.l_whence = SEEK_SET; /* l_start begins at start of file */ ri_lock.l_start = 0; /* offset from whence */ ri_lock.l_len = 0; /* until EOF */ rv = fcntl(fd, F_GETLK, &ri_lock); if (rv == -1) { fsa_error(LOG_CRIT, "fnctl locking error: %s", strerror(errno)); fclose(ri_file); *err = ADM_ERR_KB_GET_INFO; return -1; } if (ri_lock.l_type == F_WRLCK) { /* file locked, so use info */ int port, pid; ki->pid = ri_lock.l_pid; rv = fscanf(ri_file, "%d %d", &pid, &port); if (rv == 0 || rv == EOF) { fsa_error(LOG_CRIT, "bad data in runtime info file, fscanf failed"); fclose(ri_file); *err = ADM_ERR_KB_GET_INFO; return -1; } else { /* file locked and contains running port and pid */ ki->port = port; ki->status = KB_STATUS_RUNNING; } } else if (ri_lock.l_type == F_UNLCK) { /* file readable, but not locked */ ki->status = KB_STATUS_STOPPED; } fclose(ri_file); } /* pull data from metadata.nt */ md = fs_metadata_open((char *)kb_name); if (md != NULL) { ki->num_segments = (uint16_t)atoi(fs_metadata_get_string(md, FS_MD_SEGMENTS, "0")); fs_rid_vector *vec = fs_metadata_get_int_vector(md, FS_MD_SEGMENT_P); fs_rid_vector_sort(vec); /* segment ID and max segments should be 256, but allow 65536 to allow for value to be upped in #define */ ki->p_segments_len = (uint16_t)vec->length; ki->p_segments_data = (uint16_t *)malloc(ki->p_segments_len * sizeof(uint16_t)); for (int i = 0; i < vec->length; i++) { ki->p_segments_data[i] = (uint16_t)vec->data[i]; } fs_rid_vector_free(vec); fs_metadata_close(md); fsa_error(LOG_DEBUG, "metadata.nt read for kb %s", kb_name); *err = ADM_ERR_OK; } else { fsa_error(LOG_ERR, "unable to read metadata.nt for kb %s", kb_name); *err = ADM_ERR_KB_GET_INFO; } return 0; }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }