int fs_quad_import(fs_backend *be, int seg, int flags, int count, fs_rid buffer[][4]) { if ((flags & (FS_BIND_BY_SUBJECT | FS_BIND_BY_OBJECT)) == 0) { fs_error(LOG_ERR, "neither FS_BIND_BY_SUBJECT or FS_BIND_BY_OBJECT set"); return 1; } if (flags & FS_BIND_BY_OBJECT) { fs_error(LOG_WARNING, "this backend doesn't use FS_BIND_BY_OBJECT"); return 2; } if (seg < 0 || seg >= be->segments) { fs_error(LOG_ERR, "segment number %d out of range", seg); return 3; } double then = fs_time(); int i = 0; while (i < count) { for (; i < count && quad_pos < QUAD_BUF_SIZE; i++, quad_pos++) { quad_buffer[quad_pos].skip = 0; quad_buffer[quad_pos].quad[0] = buffer[i][0]; quad_buffer[quad_pos].quad[1] = buffer[i][1]; quad_buffer[quad_pos].quad[2] = buffer[i][2]; quad_buffer[quad_pos].quad[3] = buffer[i][3]; } if (quad_pos == QUAD_BUF_SIZE) { if (!be->pended_import) { be->pended_import = 1; for (int pend=0; pend < FS_PENDED_LISTS; pend++) { char label[256]; snprintf(label, 255, "pl-%1x", pend); be->pended[pend] = fs_list_open(be, label, sizeof(fs_rid) * 4, O_CREAT | O_TRUNC | O_RDWR); } } int ret = fs_quad_import_commit(be, seg, flags, 0); if (ret) { fs_error(LOG_CRIT, "quad commit failed"); return ret; } } } double now = fs_time(); be->in_time[seg].add_s += now - then; return 0; }
int fs_resolve(fs_backend *be, fs_segment segment, fs_rid_vector *v, fs_resource *out) { double then = fs_time(); int ret = 0; for (int i=0; i<v->length; i++) { out[i].rid = v->data[i]; } ret = fs_rhash_get_multi(be->res, out, v->length); be->out_time[segment].resolve_count++; be->out_time[segment].resolve += fs_time() - then; return ret; }
static int e_hfat_utime (const char *path, const struct fs_utimbuf *times) { int ret; uint16 dostime, dosdate, adosdate; uint32 mtime, atime; /* convert the given UTF-8 path to UTF-16, bail if we can't */ ret = fs_convert->to_utf16 (path, utf16_name, HFAT_MAX_PATH_UTF16_ALLOW); if (ret != ENOERR) { return ret; } /* Get new access and modification time. */ if (times != NULL) { atime = times->actime; mtime = times->modtime; } else { atime = mtime = fs_time (); } /* Set time/date */ dostime = fs_posix_to_fat_time (mtime); dosdate = fs_posix_to_fat_date (mtime); adosdate = fs_posix_to_fat_date (atime); ret = f_wsetaccmodtime (utf16_name, dostime, dosdate, adosdate); return hfat_to_efs_err (ret); }
int fs_quad_import_commit(fs_backend *be, int seg, int flags, int account) { if ((flags & (FS_BIND_BY_SUBJECT | FS_BIND_BY_OBJECT)) == 0) { fs_error(LOG_ERR, "neither FS_BIND_BY_SUBJECT or FS_BIND_BY_OBJECT set"); return 1; } if (flags & FS_BIND_BY_OBJECT) { fs_error(LOG_WARNING, "this backend doesn't use FS_BIND_BY_OBJECT"); return 2; } if (seg < 0 || seg >= be->segments) { fs_error(LOG_ERR, "segment number %d out of range", seg); return 3; } double then = fs_time(); TIME(NULL); if (be->pended_import) { for (int i=0; i<quad_pos; i++) { if (quad_buffer[i].skip) continue; const fs_rid pred = quad_buffer[i].quad[2]; const int pend_list = (pred >> 40) % FS_PENDED_LISTS; fs_list_add(be->pended[pend_list], quad_buffer[i].quad); } } else { for (int pass=0; pass<2; pass++) {
/* inplace quicksort on an array of rid_vectors */ void fs_binding_sort(fs_binding *b) { int scount = 0; int length = fs_binding_length(b); for (int i=0; b[i].name; i++) { if (b[i].sort) scount++; if (b[i].vals->length < length) { for (int j=b[i].vals->length; j<length; j++) { fs_rid_vector_append(b[i].vals, FS_RID_NULL); } } } if (!scount) { fs_error(LOG_WARNING, "fs_binding_sort() called with no sort " "columns set, ignoring"); return; } /* fill out the _ord column with integers in [0,n] */ b[0].vals->length = 0; for (int row=0; row<length; row++) { fs_rid_vector_append(b[0].vals, row); } if (length > 1) { #ifdef DEBUG_MERGE double then = fs_time(); #endif /* ctxt could include other stuff for optimisations */ struct sort_context ctxt = { b }; fs_qsort_r(b[0].vals->data, length, sizeof(fs_rid), qsort_r_cmp, &ctxt); #ifdef DEBUG_MERGE double now = fs_time(); printf("sort took %f seconds\n", now - then); #endif } }
int fs_res_import_commit(fs_backend *be, int seg, int account) { if (seg < 0 || seg >= be->segments) { fs_error(LOG_ERR, "segment number %d out of range", seg); } double then = fs_time(); fs_rhash_put_multi(be->res, res_buffer, res_pos); for (int i=0; i<res_pos; i++) { g_free(res_buffer[i].lex); } res_pos = 0; if (account) { double now = fs_time(); be->in_time[seg].commit_r += now - then; } return 0; }
void fs_binding_uniq(fs_binding *bi) { if (fs_binding_length(bi) < 2) { /* we don't need to do anything, code below assumes >= 1 row */ return; } fs_binding *b = fs_binding_copy_and_clear(bi); bi[0].vals->length = 0; #ifdef DEBUG_MERGE double then = fs_time(); #endif int length = fs_binding_length(b); int outrow = 1; for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, 0)); bi[column].bound = b[column].bound; b[column].sort = b[column].bound; } for (int row = 1; row < length; row++) { if (binding_row_compare(NULL, b, b, row, row-1, length, length) == 0) { continue; } for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, row)); } outrow++; } #ifdef DEBUG_MERGE double now = fs_time(); printf("uniq took %fs (%d->%d rows)\n", now-then, length, outrow); fs_binding_print(bi, stdout); #endif fs_binding_free(b); }
int main(int argc, char *argv[]) { fs_gnu_options(argc, argv, "<kbname> <noop|freq>\n"); char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "Usage: %s <kbname> <noop|freq>\n", basename(argv[0])); return 1; } fsp_syslog_enable(); fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error(LOG_ERR, "couldn't connect to “%s”", argv[1]); return 2; } double then = fs_time(); if (fsp_no_op(link, 0)) { fs_error(LOG_ERR, "NO-OP failed\n"); return 3; } double now = fs_time(); if (!strcmp(argv[2], "noop")) { printf("NO-OP took %fs\n", now-then); return 0; } else if (!strcmp(argv[2], "freq")) { fs_query_state *qs = fs_query_init(link, NULL, NULL); fs_optimiser_freq_print(qs); } fsp_close_link(link); }
int fs_res_import(fs_backend *be, int seg, long count, fs_resource buffer[]) { double then = fs_time(); int i = 0; while (i < count) { for (; i < count && res_pos < RES_BUF_SIZE; i++) { /* don't remember why this is commented out anymore. swh 2009-07-06 */ //XXX if (!quick_res_check(be, seg, buffer[i].rid, buffer[i].lex)) { res_buffer[res_pos].rid = buffer[i].rid; res_buffer[res_pos].attr = buffer[i].attr; res_buffer[res_pos].lex = g_strdup(buffer[i].lex); res_pos++; //} } if (res_pos == RES_BUF_SIZE) { fs_res_import_commit(be, seg, 0); } } double now = fs_time(); be->in_time[seg].add_r += now - then; return 0; }
int main(int argc, char *argv[]) { int verbosity = 0; int dryrun = 0; char *password = NULL; char *format = "auto"; FILE *msg = stderr; char *optstring = "am:M:vnf:"; int c, opt_index = 0, help = 0; int files = 0, adding = 0; char *kb_name = NULL; char *model[argc], *uri[argc]; char *model_default = NULL; password = fsp_argv_password(&argc, argv); static struct option long_options[] = { { "add", 0, 0, 'a' }, { "model", 1, 0, 'm' }, { "model-default", 1, 0, 'M' }, { "verbose", 0, 0, 'v' }, { "dryrun", 0, 0, 'n' }, { "no-resources", 0, 0, 'R' }, { "no-quads", 0, 0, 'Q' }, { "format", 1, 0, 'f' }, { "help", 0, 0, 'h' }, { "version", 0, 0, 'V' }, { 0, 0, 0, 0 } }; for (int i= 0; i < argc; ++i) { model[i] = NULL; } int help_return = 1; while ((c = getopt_long (argc, argv, optstring, long_options, &opt_index)) != -1) { if (c == 'm') { model[files++] = optarg; } else if (c == 'M') { model_default = optarg; } else if (c == 'v') { verbosity++; } else if (c == 'a') { adding = 1; } else if (c == 'n') { dryrun |= FS_DRYRUN_DELETE | FS_DRYRUN_RESOURCES | FS_DRYRUN_QUADS; } else if (c == 'R') { dryrun |= FS_DRYRUN_RESOURCES; } else if (c == 'Q') { dryrun |= FS_DRYRUN_QUADS; } else if (c == 'f') { format = optarg; } else if (c == 'h') { help = 1; help_return = 0; } else if (c == 'V') { printf("%s, built for 4store %s\n", argv[0], GIT_REV); exit(0); } else { help = 1; } } if (verbosity > 0) { if (dryrun & FS_DRYRUN_DELETE) { printf("warning: not deleting old model\n"); } if (dryrun & FS_DRYRUN_RESOURCES) { printf("warning: not importing resource nodes\n"); } if (dryrun & FS_DRYRUN_QUADS) { printf("warning: not importing quad graph\n"); } } files = 0; for (int k = optind; k < argc; ++k) { if (!kb_name) { kb_name = argv[k]; } else { if (strchr(argv[k], ':')) { uri[files] = g_strdup(argv[k]); } else { uri[files] = (char *)raptor_uri_filename_to_uri_string(argv[k]); } if (!model[files]) { if (!model_default) { model[files] = uri[files]; } else { model[files] = model_default; } } files++; } } raptor_world *rw = raptor_new_world(); if (help || !kb_name || files == 0) { fprintf(stdout, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stdout, "Usage: %s <kbname> <rdf file/URI> ...\n", argv[0]); fprintf(stdout, " -v --verbose increase verbosity (can repeat)\n"); fprintf(stdout, " -a --add add data to models instead of replacing\n"); fprintf(stdout, " -m --model specify a model URI for the next RDF file\n"); fprintf(stdout, " -M --model-default specify a model URI for all RDF files\n"); fprintf(stdout, " -f --format specify an RDF syntax for the import\n"); fprintf(stdout, "\n available formats are:\n"); for (unsigned int i=0; 1; i++) { const raptor_syntax_description *desc = raptor_world_get_parser_description(rw, i); if (!desc) { break; } fprintf(stdout, " %12s - %s\n", desc->names[0], desc->label); } exit(help_return); } fsp_syslog_enable(); fsplink = fsp_open_link(kb_name, password, FS_OPEN_HINT_RW); if (!fsplink) { fs_error (LOG_ERR, "couldn't connect to “%s”", kb_name); exit(2); } const char *features = fsp_link_features(fsplink); int has_o_index = !(strstr(features, "no-o-index")); /* tweak */ fs_hash_init(fsp_hash_type(fsplink)); const int segments = fsp_link_segments(fsplink); int total_triples = 0; fs_import_timing timing[segments]; for (int seg = 0; seg < segments; seg++) { fsp_get_import_times(fsplink, seg, &timing[seg]); } gettimeofday(&then, 0); if (fsp_start_import_all(fsplink)) { fs_error(LOG_ERR, "aborting import"); exit(3); } #if 0 printf("press enter\n"); char foo; read(0, &foo, 1); #endif fs_rid_vector *mvec = fs_rid_vector_new(0); for (int f= 0; f < files; ++f) { fs_rid muri = fs_hash_uri(model[f]); fs_rid_vector_append(mvec, muri); } if (!adding) { if (verbosity) { printf("removing old data\n"); fflush(stdout); } if (!(dryrun & FS_DRYRUN_DELETE)) { if (fsp_delete_model_all(fsplink, mvec)) { fs_error(LOG_ERR, "model delete failed"); return 1; } for (int i=0; i<mvec->length; i++) { if (mvec->data[i] == fs_c.system_config) { fs_import_reread_config(); } } } fsp_new_model_all(fsplink, mvec); } fs_rid_vector_free(mvec); gettimeofday(&then_last, 0); for (int f = 0; f < files; ++f) { if (verbosity) { printf("Reading <%s>\n", uri[f]); if (strcmp(uri[f], model[f])) { printf(" into <%s>\n", model[f]); } fflush(stdout); } fs_import(fsplink, model[f], uri[f], format, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity) { fflush(stdout); } } double sthen = fs_time(); int ret = fs_import_commit(fsplink, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity > 0) { printf("Updating index\n"); fflush(stdout); } fsp_stop_import_all(fsplink); if (verbosity > 0) { printf("Index update took %f seconds\n", fs_time()-sthen); } if (!ret) { gettimeofday(&now, 0); double diff = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) * 0.000001; if (verbosity && total_triples > 0) { printf("Imported %d triples, average %d triples/s\n", total_triples, (int)((double)total_triples/diff)); fflush(stdout); } } if (verbosity > 1) { printf("seg add_q\tadd_r\t\tcommit_q\tcommit_r\tremove\t\trebuild\t\twrite\n"); long long *tics = fsp_profile_write(fsplink); for (int seg = 0; seg < segments; seg++) { fs_import_timing newtimes; fsp_get_import_times(fsplink, seg, &newtimes); printf("%2d: %f\t%f\t%f\t%f\t%f\t%f\t%f\n", seg, newtimes.add_s - timing[seg].add_s, newtimes.add_r - timing[seg].add_r, newtimes.commit_q - timing[seg].commit_q, newtimes.commit_r - timing[seg].commit_r, newtimes.remove - timing[seg].remove, newtimes.rebuild - timing[seg].rebuild, tics[seg] * 0.001); } } fsp_close_link(fsplink); raptor_free_world(rw); return 0; }
int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); int flags = 0, many = 0, all = 0; int seg = 0; /* deliberately using signed type */ fs_rid_vector *mrids= NULL, *srids= NULL, *prids= NULL, *orids= NULL; fs_rid_vector **result = NULL; if (argc < 7) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> { many | all | seg# } <flags>\n", argv[0]); fprintf(stderr, " mrid-file srid-file prid-file orid-file [offset limit]\n"); fprintf(stderr, "For flags use FS_BIND_... symbols or a numeric value\n"); fprintf(stderr, "RID files are one RID per line\n"); exit(1); } char *kbname = argv[1]; if (!strcasecmp(argv[2], "many")) { many = 1; } else if (!strcasecmp(argv[2], "all")) { all = 1; } else { seg = atoi(argv[2]); } int param = 3; flags = strtol(argv[param], NULL, 0); if (flags == 0) { /* symbolic flags, hopefully */ while (param < argc) { const int len = sizeof(flag_name) / sizeof(char *); int k; for (k = 0; k < len; ++k) { if (!strcmp(flag_name[k], argv[param])) { flags |= flag_value[k]; break; } } if (k == len) break; param ++; } } else { param ++; /* done with the numeric flags then */ } if (argc < param + 4) { fprintf(stderr, "Wrong number of arguments\n"); exit(1); } mrids = rid_file(argv[param++]); srids = rid_file(argv[param++]); prids = rid_file(argv[param++]); orids = rid_file(argv[param++]); int limit, offset; if (argc == param ) { /* defaults */ limit = -1; offset = -1; } else if (argc > param + 2) { fprintf(stderr, "Wrong number of arguments\n"); exit(1); } else if (argc < param + 2) { fprintf(stderr, "Wrong number of arguments\n"); exit(1); } else { offset = atoi(argv[param]); limit = atoi(argv[param + 1]); } fsp_link *link = fsp_open_link(kbname, password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } segments = fsp_link_segments(link); if (seg < 0 || seg > segments) { fs_error (LOG_ERR, "Segment %d out of range (0-%u)", seg, segments); exit(1); } double then = fs_time(); int ans = 0; if (all) { ans = fsp_bind_limit_all(link, flags, mrids, srids, prids, orids, &result, offset, limit); } else if (many) { ans = fsp_bind_limit_many(link, flags, mrids, srids, prids, orids, &result, offset, limit); } else { ans = fsp_bind_limit(link, seg, flags, mrids, srids, prids, orids, &result, offset, limit); } double time_binding = fs_time() - then; if (ans != 0) exit(1); /* print results */ int cols = 0; for (int k = 0; k < 4; ++k) { if (flags & (1 << k)) cols++; } if (!result) { printf("NO MATCH found.\n"); } else if (cols == 0) { printf("MATCH found.\n"); } else if (result[0]) { int length = result[0]->length; if (flags & FS_BIND_MODEL) printf("-----Model------ "); if (flags & FS_BIND_SUBJECT) printf("----Subject----- "); if (flags & FS_BIND_PREDICATE) printf("----Predicate--- "); if (flags & FS_BIND_OBJECT) printf("-----Object-----"); putchar('\n'); for (int k = 0; k < length; ++k) { for (int c = 0; c < cols; ++c) { printf("%016llX ", result[c]->data[k]); } putchar('\n'); } } fprintf(stderr, "bind took %f seconds on client\n", time_binding); fs_query_timing times; if (all || many) { fprintf(stderr, "binding on all or many segments, times in seconds...\n"); for (int s = 0; s < segments; ++s) { fsp_get_query_times(link, s, ×); if (times.bind > 0.0f) { fprintf(stderr, "%d: %f\n", s, times.bind); } } fputc('\n', stderr); } else { fsp_get_query_times(link, seg, ×); fprintf(stderr, "binding segment %d took %f seconds\n", seg, times.bind); } fsp_close_link(link); }
static void interactive(fsp_link *link, raptor_uri *bu, const char *result_format, int verbosity, int opt_level, int result_flags, int soft_limit, raptor_world *rw) { char *query = NULL; /* fill out readline functions */ load_history_dotfile(); rl_attempted_completion_function = resource_completion; fs_query_state *qs = fs_query_init(link, NULL, NULL); qs->verbosity = verbosity; do { /* assemble query string */ char *line = readline("4store>"); if (!line) break; /* EOF */ g_free(query); query = g_strdup(line); if (*line == '\0') { free(line); continue; } while (line && !g_str_has_suffix(line, "#EOQ")) { free(line); line = readline(" >"); if (line) { char *old = query; query = g_strjoin("\n", old, line, NULL); g_free(old); } } free(line); add_history(query); char *old = query; query = g_strconcat(old, "\n", NULL); g_free(old); /* process query string */ double then = 0.0; if (query && strcmp(query, "#EOQ")) { if (show_timing) { then = fs_time(); } fs_query *tq = fs_query_execute(qs, link, bu, query, result_flags, opt_level, soft_limit, 0); if (show_timing) { double now = fs_time(); printf("# bind time %.3fs\n", now-then); } fs_query_results_output(tq, result_format, 0, stdout); fs_query_free(tq); if (result_format && !strcmp(result_format, "sparql")) { if (show_timing) { double now = fs_time(); printf("<!-- EOR execution time %.3fs -->\n", now-then); } else { printf("<!-- EOR -->\n"); } } else { printf("#EOR\n"); if (show_timing) { double now = fs_time(); printf("# execution time %.3fs\n", now-then); } } fflush(stdout); } } while (query); raptor_free_uri(bu); fsp_close_link(link); raptor_free_world(rw); save_history_dotfile(); fs_query_cache_flush(qs, verbosity); fs_query_fini(qs); exit(0); }
static void programatic_io(fsp_link *link, raptor_uri *bu, const char *query_lang, const char *result_format, fs_query_timing *timing, int verbosity, int opt_level, unsigned int result_flags, int soft_limit, raptor_world *rw) { char query[MAX_Q_SIZE]; char *pos; char *newl; const int segments = fsp_link_segments(link); fs_query_state *qs = fs_query_init(link, NULL, NULL); qs->verbosity = verbosity; do { pos = query; *query = '\0'; /* assemble query string */ do { newl = fgets(pos, query + MAX_Q_SIZE - pos - 1, stdin); if (newl) { pos += strlen(newl); } } while (newl && strcmp(newl, "#EOQ\n") && strcmp(newl, "#END\n")); /* process query string */ if (*query && strcmp(query, "#EOQ\n") && strcmp(query, "#END\n")) { if (show_timing) { printf("Q: %s\n", query); } fs_query *tq = fs_query_execute(qs, link, bu, query, result_flags, opt_level, soft_limit, 0); fs_query_results_output(tq, result_format, 0, stdout); if (show_timing) { printf("# time: %f s\n", fs_time() - fs_query_start_time(tq)); printf("seg bind\t(secs)\t\tprice\t(secs)\t\tresolve\t(secs)\t\twait (secs)\n"); long long *tics = fsp_profile_write(link); fs_query_timing total_time = {0, 0, 0, 0, 0, 0}; for (int seg = 0; seg < segments; seg++) { fs_query_timing newtimes; fsp_get_query_times(link, seg, &newtimes); printf("%2d: %4d\t%f\t%4d\t%f\t%4d\t%f\t%f\n", seg, newtimes.bind_count - timing[seg].bind_count, newtimes.bind - timing[seg].bind, newtimes.price_count - timing[seg].price_count, newtimes.price - timing[seg].price, newtimes.resolve_count - timing[seg].resolve_count, newtimes.resolve - timing[seg].resolve, tics[seg] * 0.001); total_time.bind_count += newtimes.bind_count - timing[seg].bind_count; total_time.bind += newtimes.bind- timing[seg].bind; total_time.price_count += newtimes.price_count - timing[seg].price_count; total_time.price += newtimes.price - timing[seg].price; total_time.resolve_count += newtimes.resolve_count - timing[seg].resolve_count; total_time.resolve += newtimes.resolve - timing[seg].resolve; } printf("TT: %4d\t%f\t%4d\t%f\t%4d\t%f\n", total_time.bind_count, total_time.bind, total_time.price_count, total_time.price, total_time.resolve_count, total_time.resolve); } fs_query_free(tq); if (result_format && !strcmp(result_format, "sparql")) { printf("<!-- EOR -->\n"); } else { printf("#EOR\n"); } fflush(stdout); } } while (newl && strcmp(newl, "#END\n")); raptor_free_uri(bu); fsp_close_link(link); raptor_free_world(rw); fs_query_cache_flush(qs, verbosity); fs_query_fini(qs); exit(0); }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }