static int delete_rasqal_triple(struct update_context *ct, fs_rid_vector *vec[], rasqal_triple *triple, int row) { fs_rid m, s, p, o; if (triple->origin) { m = fs_hash_rasqal_literal(ct, triple->origin, row); if (m == FS_RID_NULL) return 1; } else if (ct->op->graph_uri) { m = fs_hash_uri((char *)raptor_uri_as_string(ct->op->graph_uri)); } else { /* m can be wildcard in the absence of GRAPH, WITH etc. */ m = FS_RID_NULL; } s = fs_hash_rasqal_literal(ct, triple->subject, row); if (s == FS_RID_NULL) return 1; p = fs_hash_rasqal_literal(ct, triple->predicate, row); if (p == FS_RID_NULL) return 1; o = fs_hash_rasqal_literal(ct, triple->object, row); if (o == FS_RID_NULL) return 1; /* as long as s, p, and o are bound, we can add this quad */ fs_rid_vector_append(vec[0], m); fs_rid_vector_append(vec[1], s); fs_rid_vector_append(vec[2], p); fs_rid_vector_append(vec[3], o); if (fs_rid_vector_length(vec[0]) > 999) { fsp_delete_quads_all(ct->link, vec); for (int s=0; s<4; s++) { fs_rid_vector_truncate(vec[s], 0); } } return 0; }
/* ..._start and ..._finish share an int * count parameter * the same variable should be passed by reference both times */ int fs_import_stream_start(fsp_link *link, const char *model_uri, const char *mimetype, int has_o_index, int *count) { if (inited == 0) { memset(&parse_data, 0, sizeof(parse_data)); inited = 1; } parse_data.link = link; parse_data.segments = fsp_link_segments(link); parse_data.ext_count = count; for (int i=0; i<parse_data.segments; i++) { for (int j=0; j<RES_BUF_SIZE; j++) { lex_tmp[i][j] = malloc(RES_BUF_SIZE); } } memset(nodecache, 0, sizeof(nodecache)); parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX"); parse_data.quad_fd = mkstemp(parse_data.quad_fn); if (parse_data.quad_fd < 0) { fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn); return 1; } parse_data.muri = raptor_new_uri((unsigned char *) model_uri); parse_data.model = g_strdup(model_uri); parse_data.model_hash = fs_hash_uri(model_uri); parse_data.count_trip = 0; parse_data.count_err = 0; parse_data.last_count = 0; parse_data.has_o_index = has_o_index; /* store the model uri */ buffer_res(link, parse_data.segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, parse_data.dryrun); parse_data.parser = raptor_new_parser_for_content(NULL, mimetype, NULL, 0, (unsigned char *) parse_data.model); if (!parse_data.parser) { return 1; } /* use us as a vector for an indirect attack? no thanks */ raptor_set_feature(parse_data.parser, RAPTOR_FEATURE_NO_NET, 0); raptor_set_fatal_error_handler(parse_data.parser, link, fatal_rdf_parser_error); raptor_set_error_handler(parse_data.parser, link, rdf_parser_error); raptor_set_statement_handler(parse_data.parser, &parse_data, store_stmt); raptor_set_graph_handler(parse_data.parser, &parse_data, graph_handler); raptor_start_parse(parse_data.parser, parse_data.muri); fs_hash_freshen(); /* blank nodes are unique per file */ return 0; }
fs_value fs_value_uri(const char *s) { fs_value v = fs_value_blank(); v.rid = fs_hash_uri(s); v.lex = (char *)s; v.valid = fs_valid_bit(FS_V_RID); v.attr = FS_RID_NULL; return v; }
static fs_rid insert_uri(xmlctxt *ctxt) { char *uri = ctxt->resource; if (!uri || uri[0] == '\0') { fs_error(LOG_ERR, "NULL URI inserted"); return 0; } fs_rid r = fs_hash_uri(uri); insert_resource(ctxt, r, fs_c.empty, uri); return r; }
void graph_handler(void *user_data, raptor_uri *graph) { g_free(parse_data.model); if (graph == NULL) { parse_data.model = g_strdup((char *) raptor_uri_as_string(parse_data.muri)); } else { parse_data.model = g_strdup((char *) raptor_uri_as_string(graph)); } parse_data.model_hash = fs_hash_uri(parse_data.model); buffer_res(parse_data.link, parse_data.segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, parse_data.dryrun); }
fs_value fs_value_plain_with_dt(const char *s, const char *d) { fs_value v = fs_value_blank(); if (!d || *d == '\0') { v.attr = fs_c.empty; } else { v.attr = fs_hash_uri(d); } v.lex = (char *)s; return v; }
int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "%s revision %s\n", argv[0], FS_BACKEND_VER); fprintf(stderr, "Usage: %s <uri> | \"literal\"\n", argv[0]); exit(1); } char *string = argv[1]; char lex[128], lang[128], type[128], uri[128]; fs_rid rid; #ifdef FS_MD5 fs_hash_init(FS_HASH_MD5); #endif #ifdef FS_CRC64 fs_hash_init(FS_HASH_CRC64); #endif #ifdef FS_UMAC fs_hash_init(FS_HASH_UMAC); #endif if (sscanf(string, "\"%127[^\"]\"@%127s", lex, lang) == 2) { rid = fs_hash_literal(lex,fs_hash_literal(lang, 0)); } else if (sscanf(string, "\"%127[^\"]\"^^%127s", lex, type) == 2) { rid = fs_hash_literal(lex,fs_hash_uri(type)); } else if (sscanf(string, "\"%127[^\"]\"", lex) == 1) { rid = fs_hash_literal(lex, 0); } else if (sscanf(string, "<%127[^>]>", uri) == 1) { rid = fs_hash_uri(uri); } else { fprintf(stderr, "Couldn't recognise a URI or literal in string '%s'\n", string); exit(1); } printf("%016llX\n", rid); }
static int insert_rasqal_triple(struct update_context *uc, rasqal_triple *triple, int row) { fs_rid quad_buf[1][4]; fs_resource res; if (triple->origin) { fs_resource_from_rasqal_literal(uc, triple->origin, &res, row); quad_buf[0][0] = fs_hash_rasqal_literal(uc, triple->origin, row); } else if (uc->op->graph_uri) { res.lex = (char *)raptor_uri_as_string(uc->op->graph_uri); res.attr = FS_RID_NULL; quad_buf[0][0] = fs_hash_uri((char *)raptor_uri_as_string(uc->op->graph_uri)); } else { quad_buf[0][0] = fs_c.default_graph; res.lex = FS_DEFAULT_GRAPH; res.attr = FS_RID_NULL; } if (quad_buf[0][0] == fs_c.system_config) fsp_reload_acl_system(uc->link); if (!FS_IS_URI(quad_buf[0][0])) { return 1; } quad_buf[0][1] = fs_hash_rasqal_literal(uc, triple->subject, row); if (FS_IS_LITERAL(quad_buf[0][1])) { return 1; } quad_buf[0][2] = fs_hash_rasqal_literal(uc, triple->predicate, row); if (!FS_IS_URI(quad_buf[0][2])) { return 1; } quad_buf[0][3] = fs_hash_rasqal_literal(uc, triple->object, row); res.rid = quad_buf[0][0]; if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][0], uc->segments), 1, &res); res.rid = quad_buf[0][1]; fs_resource_from_rasqal_literal(uc, triple->subject, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), 1, &res); res.rid = quad_buf[0][2]; fs_resource_from_rasqal_literal(uc, triple->predicate, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][2], uc->segments), 1, &res); res.rid = quad_buf[0][3]; fs_resource_from_rasqal_literal(uc, triple->object, &res, 0); if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][3], uc->segments), 1, &res); fsp_quad_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), FS_BIND_BY_SUBJECT, 1, quad_buf); //printf("I %016llx %016llx %016llx %016llx\n", quad_buf[0][0], quad_buf[0][1], quad_buf[0][2], quad_buf[0][3]); return 0; }
static fs_rid insert_typed(xmlctxt *ctxt) { char *text = ctxt->resource; fs_rid dt = fs_c.empty; if (ctxt->attr) { dt = fs_hash_uri(ctxt->attr); insert_resource(ctxt, dt, fs_c.empty, ctxt->attr); } else { fs_error(LOG_ERR, "NULL type URI inserted"); } fs_rid r = fs_hash_literal(text, dt); insert_resource(ctxt, r, dt, text); return r; }
void fs_resource_from_rasqal_literal(struct update_context *uctxt, rasqal_literal *l, fs_resource *res, int row) { if (!l) { res->lex = "(null)"; res->attr = FS_RID_NULL; return; } rasqal_literal_type type = l->type; if (type == RASQAL_LITERAL_VARIABLE) { /* right now you can't introduce new literals in INSERT, so it doesn't * matter */ res->lex = NULL; res->attr = FS_RID_GONE; } else if (type == RASQAL_LITERAL_URI) { res->lex = (char *)raptor_uri_as_string(l->value.uri); res->attr = FS_RID_NULL; } else { res->lex = (char *)l->string; res->attr = 0; fs_resource ares; ares.lex = NULL; if (l->datatype) { res->attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype)); ares.rid = res->attr; ares.lex = (char *)raptor_uri_as_string(l->datatype); ares.attr = FS_RID_NULL; } else if (l->language) { res->attr = fs_hash_literal(l->language, 0); ares.rid = res->attr; ares.lex = (char *)l->language; ares.attr = 0; } /* insert attribute resource if there is one */ if (ares.lex) { fsp_res_import(uctxt->link, FS_RID_SEGMENT(ares.rid, uctxt->segments), 1, &ares); } } }
int fs_clear(struct update_context *uc, char *graphuri) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid mrid; if (graphuri) { mrid = fs_hash_uri(graphuri); } else { graphuri = FS_DEFAULT_GRAPH; mrid = fs_c.default_graph; } fs_rid_vector_append(mvec, mrid); int errors = 0; if (fsp_delete_model_all(uc->link, mvec)) { errors++; add_message(uc, g_strdup_printf("Error while trying to delete %s", graphuri), 1); } else { add_message(uc, g_strdup_printf("Deleted <%s>", graphuri), 1); } fs_rid_vector_free(mvec); return errors; }
int fs_copy(struct update_context *uc, char *from, char *to) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid_vector *empty = fs_rid_vector_new(0); fs_rid fromrid, torid; if (from) { fromrid = fs_hash_uri(from); } else { from = FS_DEFAULT_GRAPH; fromrid = fs_c.default_graph; } if (to) { torid = fs_hash_uri(to); } else { to = FS_DEFAULT_GRAPH; torid = fs_c.default_graph; } if (fromrid == torid) { /*don't need to do anything */ fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } fs_rid_vector_append(mvec, fromrid); /* search for all the triples in from */ fs_rid_vector **results; fs_rid_vector *slot[4] = { mvec, empty, empty, empty }; /* see if there's any data in <from> */ fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT, slot, &results, -1, 1); if (!results || results[0]->length == 0) { if (results) { fs_rid_vector_free(results[0]); free(results); } fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("<%s> is empty, not copying", from), 1); return 1; } fs_rid_vector_free(results[0]); free(results); /* get the contents of <from> */ fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT, slot, &results, -1, -1); /* map old bnodes to new ones */ map_bnodes(uc, results[0]); map_bnodes(uc, results[1]); map_bnodes(uc, results[2]); /* delete <to> */ mvec->data[0] = torid; if (fsp_delete_model_all(uc->link, mvec)) { fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("Error while trying to delete %s", to), 1); return 1; } fs_rid_vector_free(mvec); fs_rid_vector_free(empty); /* insert <to> */ fs_resource tores; tores.lex = to; tores.attr= FS_RID_NULL; tores.rid = torid; fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores); insert_triples(uc, torid, results[0], results[1], results[2]); add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1); add_message(uc, g_strdup_printf("%d triples added, ?? removed", results[0]->length), 1); for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); return 0; }
int fs_add(struct update_context *uc, char *from, char *to) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid_vector *empty = fs_rid_vector_new(0); fs_rid fromrid, torid; if (from) { fromrid = fs_hash_uri(from); } else { from = FS_DEFAULT_GRAPH; fromrid = fs_c.default_graph; } if (to) { torid = fs_hash_uri(to); } else { to = FS_DEFAULT_GRAPH; torid = fs_c.default_graph; } if (fromrid == torid) { /*don't need to do anything */ add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } fs_rid_vector_append(mvec, fromrid); int errors = 0; /* search for all the triples in from */ fs_rid_vector **results; fs_rid_vector *slot[4] = { mvec, empty, empty, empty }; fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT, slot, &results, -1, -1); fs_rid_vector_free(mvec); fs_rid_vector_free(empty); if (!results || results[0]->length == 0) { /* there's nothing to add */ if (results) { for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); } add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } map_bnodes(uc, results[0]); map_bnodes(uc, results[1]); map_bnodes(uc, results[2]); fs_resource tores; tores.lex = to; tores.attr= FS_RID_NULL; tores.rid = torid; fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores); insert_triples(uc, torid, results[0], results[1], results[2]); add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, g_strdup_printf("%d triples added, 0 removed", results[0]->length), 1); for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); return errors; }
fs_rid fs_hash_rasqal_literal(struct update_context *uc, rasqal_literal *l, int row) { if (!l) return FS_RID_NULL; if (l->type == RASQAL_LITERAL_VARIABLE) { if (uc->q) { return fs_binding_get_val(uc->q->bb[0], l->value.variable, row, NULL); } fs_error(LOG_ERR, "no variables bound"); return FS_RID_NULL; } rasqal_literal_type type = rasqal_literal_get_rdf_term_type(l); switch (type) { case RASQAL_LITERAL_URI: return fs_hash_uri((char *)raptor_uri_as_string(l->value.uri)); case RASQAL_LITERAL_UNKNOWN: case RASQAL_LITERAL_STRING: case RASQAL_LITERAL_XSD_STRING: { fs_rid attr = 0; if (l->datatype) { attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype)); } else if (l->language) { /* lang tags are normalised to upper case internally */ char *lang = g_ascii_strup((char *)l->language, -1); attr = fs_hash_literal(lang, 0); g_free(lang); } return fs_hash_literal((char *)rasqal_literal_as_string(l), attr); } case RASQAL_LITERAL_BLANK: { raptor_term_blank_value bnode; bnode.string = (unsigned char *)rasqal_literal_as_string(l); bnode.string_len = strlen((char *)bnode.string); return fs_bnode_id(uc->link, bnode); } case RASQAL_LITERAL_VARIABLE: case RASQAL_LITERAL_QNAME: case RASQAL_LITERAL_PATTERN: case RASQAL_LITERAL_BOOLEAN: case RASQAL_LITERAL_INTEGER: case RASQAL_LITERAL_INTEGER_SUBTYPE: case RASQAL_LITERAL_DECIMAL: case RASQAL_LITERAL_FLOAT: case RASQAL_LITERAL_DOUBLE: case RASQAL_LITERAL_DATETIME: case RASQAL_LITERAL_UDT: #if RASQAL_VERSION >= 929 case RASQAL_LITERAL_DATE: #endif break; } fs_error(LOG_ERR, "bad rasqal literal (type %d)", type); return FS_RID_NULL; }
static void rdf_parser_statement_handler(void* user_data, const raptor_statement* st) { raptor_term *g, *s, *p, *o; g = st->graph; s = st->subject; p = st->predicate; o = st->object; rdf_parser_internal *parser_obj = (rdf_parser_internal *) user_data; if (parser_obj->counter == 0) parser_obj->partial_parse_time = g_timer_new(); parser_obj->counter++; /* init index logic */ unsigned char *gc = NULL; fs_rid g_rid; if (parser_obj->trig) { gc = raptor_uri_as_string(g->value.uri); g_rid = fs_hash_uri((const char *)gc); } else { g_rid = parser_obj->g_rid ; gc = parser_obj->model; } unsigned char *sc = NULL; if (s->type == RAPTOR_TERM_TYPE_URI) sc = raptor_uri_as_string(s->value.uri); else { sc = (unsigned char *) g_strdup_printf("bnode:b%s%s",s->value.blank.string+5,parser_obj->bnode_ts); } unsigned char *pc = raptor_uri_as_string(p->value.uri); unsigned char *oc = NULL; unsigned char *o_lang = NULL; unsigned char *o_datatype = NULL; fs_rid s_rid = fs_hash_uri((const char *) sc); fs_rid p_rid = fs_hash_uri((const char *) pc); fs_rid o_rid = 0x0; if (o->type == RAPTOR_TERM_TYPE_URI) { oc = raptor_uri_as_string(o->value.uri); o_rid = fs_hash_uri((const char *) oc); } else if (o->type == RAPTOR_TERM_TYPE_LITERAL) { oc = o->value.literal.string; if (o->value.literal.datatype) { o_datatype = raptor_uri_as_string(o->value.literal.datatype); o_rid = fs_hash_literal((const char *) oc,fs_hash_uri((const char *) o_datatype)); } else if (o->value.literal.language != NULL) { o_lang = o->value.literal.language; o_rid = fs_hash_literal((const char *) oc, fs_hash_uri((const char *) o_lang)); } else { o_rid = fs_hash_literal((const char *) oc, FS_RID_NULL); } oc = raptor_term_to_string(o); } else if (o->type == RAPTOR_TERM_TYPE_BLANK) { oc = (unsigned char *) g_strdup_printf("bnode:b%s%s",o->value.blank.string+5,parser_obj->bnode_ts); o_rid = fs_hash_uri((const char *) oc); } int seg_id = s_rid % SEGMENTS; fs_rid *quad = rdf_parser_new_quad(g_rid,s_rid,p_rid,o_rid); g_ptr_array_add(parser_obj->quads[seg_id],quad); /* saves hashes into disk hash TODO looks for optimistions */ rdf_kb *kb = parser_obj->kb; fs_rid hashes[4] = {g_rid,s_rid,p_rid,o_rid}; //log_debug("%llx %llx %llx %llx", hashes[0], hashes[1], hashes[2], hashes[3]); //char tmp_rid[16+1]; char tmp_rid[17]; unsigned char *strings[4] = {gc,sc,pc,oc}; int assigned_hash=0; for(int i=0;i<4;i++) { memset(tmp_rid,0,16); sprintf(tmp_rid,"%llx",hashes[i]); assigned_hash = hashes[i] % HASHES_NUM; //if (i > 0 && strlen(tmp_rid) < 15 ) //printf("ERRRRO NUL %s %s\n", tmp_rid, strings[i]); kcdbset(kb->hash_stores[assigned_hash],(const char *) tmp_rid, 16 ,(const char *) strings[i], strlen((const char *) strings[i])); } /* end of saving hashes into disk */ if (!(parser_obj->counter % STAT_BATCH)) { double kt = parser_obj->counter/1e3; log_debug("parsing progress %.0lf kT %.2lf kT/s %.2lf kT/s",kt,kt/g_timer_elapsed(parser_obj->global_parse_time,NULL), (STAT_BATCH/1e3)/g_timer_elapsed(parser_obj->partial_parse_time,NULL)); g_timer_start(parser_obj->partial_parse_time); } }
static void store_stmt(void *user_data, const raptor_statement * statement) { fs_parse_stuff *data = (fs_parse_stuff *) user_data; char *subj = (char *) raptor_uri_as_string((raptor_uri *) statement->subject); char *pred; char *obj; fs_rid m, s, p, o; char tmpp[512]; m = data->model_hash; if (statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) { s = fs_bnode_id(data->link, statement->subject); subj = (char *) statement->subject; } else { s = fs_hash_uri(subj); } if (statement->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) { sprintf(tmpp, MEMBER_PREFIX "%d", *((int *)statement->predicate)); pred = tmpp; } else { pred = (char *) raptor_uri_as_string((raptor_uri *) statement->predicate); } p = fs_hash_uri(pred); fs_rid attr = fs_c.empty; if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL || statement->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) { obj = (char *) statement->object; if (statement->object_literal_language) { char *langtag = (char *)statement->object_literal_language; for (char *pos = langtag; *pos; pos++) { if (islower(*pos)) { *pos = toupper(*pos); } } attr = fs_hash_literal(langtag, 0); buffer_res(data->link, data->segments, attr, langtag, fs_c.empty, data->dryrun); } else if (raptor_uri_as_string(statement->object_literal_datatype)) { char *dt = (char *)raptor_uri_as_string(statement->object_literal_datatype); attr = fs_hash_uri(dt); buffer_res(data->link, data->segments, attr, dt, FS_RID_NULL, data->dryrun); } o = fs_hash_literal(obj, attr); } else if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) { o = fs_bnode_id(data->link, statement->object); obj = (char *) statement->object; } else { obj = (char *) raptor_uri_as_string((raptor_uri *) statement-> object); attr = FS_RID_NULL; o = fs_hash_uri(obj); } buffer_res(data->link, data->segments, s, subj, FS_RID_NULL, data->dryrun); buffer_res(data->link, data->segments, p, pred, FS_RID_NULL, data->dryrun); buffer_res(data->link, data->segments, o, obj, attr, data->dryrun); fs_rid tbuf[4] = { m, s, p, o }; retry_write: if (write(data->quad_fd, tbuf, sizeof(tbuf)) == -1) { fs_error(LOG_ERR, "failed to buffer quad to fd %d (0x%x): %s", data->quad_fd, data->quad_fd, strerror(errno)); if (errno == EAGAIN || errno == EINTR || errno == ENOSPC) { sleep(5); goto retry_write; } } if (data->verbosity > 2) { fprintf(stderr, "%016llx %016llx %016llx %016llx\n", m, s, p, o); } data->count_trip++; total_triples_parsed++; if (data->verbosity && total_triples_parsed % 10000 == 0) { printf("Pass 1, processed %d triples\r", total_triples_parsed); fflush(stdout); } if (total_triples_parsed == FS_CHUNK_SIZE) { if (data->verbosity) printf("Pass 1, processed %d triples (%d)\n", FS_CHUNK_SIZE, data->count_trip); *(data->ext_count) += process_quads(data); data->last_count = data->count_trip; total_triples_parsed = 0; gettimeofday(&then_last, 0); } }
int fs_import(fsp_link *link, const char *model_uri, char *resource_uri, const char *format, int verbosity, int dryrun, int has_o_index, FILE *msg, int *count) { raptor_parser *rdf_parser = NULL; raptor_uri ruri = NULL; int ret = 0; const int segments = fsp_link_segments(link); parse_data.ext_count = count; if (!inited) { inited = 1; parse_data.link = link; parse_data.segments = fsp_link_segments(link); for (int i=0; i<parse_data.segments; i++) { for (int j=0; j<RES_BUF_SIZE; j++) { lex_tmp[i][j] = malloc(RES_BUF_SIZE); } } memset(nodecache, 0, sizeof(nodecache)); parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX"); parse_data.quad_fd = mkstemp(parse_data.quad_fn); if (parse_data.quad_fd < 0) { fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn); return 1; } gettimeofday(&then_last, 0); } parse_data.verbosity = verbosity; parse_data.model = g_strdup(model_uri); parse_data.model_hash = fs_hash_uri(model_uri); parse_data.count_trip = 0; parse_data.last_count = 0; parse_data.dryrun = dryrun; parse_data.has_o_index = has_o_index; /* store the model uri */ buffer_res(link, segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, dryrun); if (strcmp(format, "auto")) { rdf_parser = raptor_new_parser(format); } else if (strstr(resource_uri, ".n3") || strstr(resource_uri, ".ttl")) { rdf_parser = raptor_new_parser("turtle"); } else if (strstr(resource_uri, ".nt")) { rdf_parser = raptor_new_parser("ntriples"); } else { rdf_parser = raptor_new_parser("rdfxml"); } if (!rdf_parser) { fs_error(LOG_ERR, "failed to create RDF parser"); return 1; } raptor_set_statement_handler(rdf_parser, &parse_data, store_stmt); raptor_set_graph_handler(rdf_parser, &parse_data, graph_handler); ruri = raptor_new_uri((unsigned char *) resource_uri); parse_data.muri = raptor_new_uri((unsigned char *) model_uri); if (raptor_parse_uri(rdf_parser, ruri, parse_data.muri)) { fs_error(LOG_ERR, "failed to parse file “%s”", resource_uri); ret++; } if (verbosity) { printf("Pass 1, processed %d triples (%d)\n", total_triples_parsed, parse_data.count_trip); } raptor_free_parser(rdf_parser); raptor_free_uri(ruri); raptor_free_uri(parse_data.muri); g_free(parse_data.model); fs_hash_freshen(); /* blank nodes are unique per file */ return ret; }
int main(int argc, char *argv[]) { int verbosity = 0; int dryrun = 0; char *password = NULL; char *format = "auto"; FILE *msg = stderr; char *optstring = "am:M:vnf:"; int c, opt_index = 0, help = 0; int files = 0, adding = 0; char *kb_name = NULL; char *model[argc], *uri[argc]; char *model_default = NULL; password = fsp_argv_password(&argc, argv); static struct option long_options[] = { { "add", 0, 0, 'a' }, { "model", 1, 0, 'm' }, { "model-default", 1, 0, 'M' }, { "verbose", 0, 0, 'v' }, { "dryrun", 0, 0, 'n' }, { "no-resources", 0, 0, 'R' }, { "no-quads", 0, 0, 'Q' }, { "format", 1, 0, 'f' }, { "help", 0, 0, 'h' }, { "version", 0, 0, 'V' }, { 0, 0, 0, 0 } }; for (int i= 0; i < argc; ++i) { model[i] = NULL; } int help_return = 1; while ((c = getopt_long (argc, argv, optstring, long_options, &opt_index)) != -1) { if (c == 'm') { model[files++] = optarg; } else if (c == 'M') { model_default = optarg; } else if (c == 'v') { verbosity++; } else if (c == 'a') { adding = 1; } else if (c == 'n') { dryrun |= FS_DRYRUN_DELETE | FS_DRYRUN_RESOURCES | FS_DRYRUN_QUADS; } else if (c == 'R') { dryrun |= FS_DRYRUN_RESOURCES; } else if (c == 'Q') { dryrun |= FS_DRYRUN_QUADS; } else if (c == 'f') { format = optarg; } else if (c == 'h') { help = 1; help_return = 0; } else if (c == 'V') { printf("%s, built for 4store %s\n", argv[0], GIT_REV); exit(0); } else { help = 1; } } if (verbosity > 0) { if (dryrun & FS_DRYRUN_DELETE) { printf("warning: not deleting old model\n"); } if (dryrun & FS_DRYRUN_RESOURCES) { printf("warning: not importing resource nodes\n"); } if (dryrun & FS_DRYRUN_QUADS) { printf("warning: not importing quad graph\n"); } } files = 0; for (int k = optind; k < argc; ++k) { if (!kb_name) { kb_name = argv[k]; } else { if (strchr(argv[k], ':')) { uri[files] = g_strdup(argv[k]); } else { uri[files] = (char *)raptor_uri_filename_to_uri_string(argv[k]); } if (!model[files]) { if (!model_default) { model[files] = uri[files]; } else { model[files] = model_default; } } files++; } } raptor_world *rw = raptor_new_world(); if (help || !kb_name || files == 0) { fprintf(stdout, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stdout, "Usage: %s <kbname> <rdf file/URI> ...\n", argv[0]); fprintf(stdout, " -v --verbose increase verbosity (can repeat)\n"); fprintf(stdout, " -a --add add data to models instead of replacing\n"); fprintf(stdout, " -m --model specify a model URI for the next RDF file\n"); fprintf(stdout, " -M --model-default specify a model URI for all RDF files\n"); fprintf(stdout, " -f --format specify an RDF syntax for the import\n"); fprintf(stdout, "\n available formats are:\n"); for (unsigned int i=0; 1; i++) { const raptor_syntax_description *desc = raptor_world_get_parser_description(rw, i); if (!desc) { break; } fprintf(stdout, " %12s - %s\n", desc->names[0], desc->label); } exit(help_return); } fsp_syslog_enable(); fsplink = fsp_open_link(kb_name, password, FS_OPEN_HINT_RW); if (!fsplink) { fs_error (LOG_ERR, "couldn't connect to “%s”", kb_name); exit(2); } const char *features = fsp_link_features(fsplink); int has_o_index = !(strstr(features, "no-o-index")); /* tweak */ fs_hash_init(fsp_hash_type(fsplink)); const int segments = fsp_link_segments(fsplink); int total_triples = 0; fs_import_timing timing[segments]; for (int seg = 0; seg < segments; seg++) { fsp_get_import_times(fsplink, seg, &timing[seg]); } gettimeofday(&then, 0); if (fsp_start_import_all(fsplink)) { fs_error(LOG_ERR, "aborting import"); exit(3); } #if 0 printf("press enter\n"); char foo; read(0, &foo, 1); #endif fs_rid_vector *mvec = fs_rid_vector_new(0); for (int f= 0; f < files; ++f) { fs_rid muri = fs_hash_uri(model[f]); fs_rid_vector_append(mvec, muri); } if (!adding) { if (verbosity) { printf("removing old data\n"); fflush(stdout); } if (!(dryrun & FS_DRYRUN_DELETE)) { if (fsp_delete_model_all(fsplink, mvec)) { fs_error(LOG_ERR, "model delete failed"); return 1; } for (int i=0; i<mvec->length; i++) { if (mvec->data[i] == fs_c.system_config) { fs_import_reread_config(); } } } fsp_new_model_all(fsplink, mvec); } fs_rid_vector_free(mvec); gettimeofday(&then_last, 0); for (int f = 0; f < files; ++f) { if (verbosity) { printf("Reading <%s>\n", uri[f]); if (strcmp(uri[f], model[f])) { printf(" into <%s>\n", model[f]); } fflush(stdout); } fs_import(fsplink, model[f], uri[f], format, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity) { fflush(stdout); } } double sthen = fs_time(); int ret = fs_import_commit(fsplink, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity > 0) { printf("Updating index\n"); fflush(stdout); } fsp_stop_import_all(fsplink); if (verbosity > 0) { printf("Index update took %f seconds\n", fs_time()-sthen); } if (!ret) { gettimeofday(&now, 0); double diff = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) * 0.000001; if (verbosity && total_triples > 0) { printf("Imported %d triples, average %d triples/s\n", total_triples, (int)((double)total_triples/diff)); fflush(stdout); } } if (verbosity > 1) { printf("seg add_q\tadd_r\t\tcommit_q\tcommit_r\tremove\t\trebuild\t\twrite\n"); long long *tics = fsp_profile_write(fsplink); for (int seg = 0; seg < segments; seg++) { fs_import_timing newtimes; fsp_get_import_times(fsplink, seg, &newtimes); printf("%2d: %f\t%f\t%f\t%f\t%f\t%f\t%f\n", seg, newtimes.add_s - timing[seg].add_s, newtimes.add_r - timing[seg].add_r, newtimes.commit_q - timing[seg].commit_q, newtimes.commit_r - timing[seg].commit_r, newtimes.remove - timing[seg].remove, newtimes.rebuild - timing[seg].rebuild, tics[seg] * 0.001); } } fsp_close_link(fsplink); raptor_free_world(rw); return 0; }
void fs_hash_init(fsp_hash_enum type) { switch (type) { case FS_HASH_MD5: case FS_HASH_CRC64: fs_error(LOG_CRIT, "Unsuported backend hash function, exiting"); exit(4); break; case FS_HASH_UMAC: break; case FS_HASH_UNKNOWN: fs_error(LOG_CRIT, "Unknown backend hash function, exiting"); exit(4); break; } bnids = g_hash_table_new_full(g_str_hash, g_str_equal, bnhash_destroy, NULL); atexit(fs_hash_fini); fs_c.empty = 0LL; fs_c.xsd_string = fs_hash_uri(XSD_STRING); fs_c.xsd_integer = fs_hash_uri(XSD_INTEGER); fs_c.xsd_float = fs_hash_uri(XSD_FLOAT); fs_c.xsd_double = fs_hash_uri(XSD_DOUBLE); fs_c.xsd_decimal = fs_hash_uri(XSD_DECIMAL); fs_c.xsd_boolean = fs_hash_uri(XSD_BOOLEAN); fs_c.xsd_datetime = fs_hash_uri(XSD_DATETIME); fs_c.xsd_pinteger = fs_hash_uri(XSD_NAMESPACE "positiveInteger"); fs_c.xsd_ninteger = fs_hash_uri(XSD_NAMESPACE "negativeInteger"); fs_c.xsd_npinteger = fs_hash_uri(XSD_NAMESPACE "nonPositiveInteger"); fs_c.xsd_nninteger = fs_hash_uri(XSD_NAMESPACE "nonNegativeInteger"); fs_c.xsd_long = fs_hash_uri(XSD_NAMESPACE "long"); fs_c.xsd_int = fs_hash_uri(XSD_NAMESPACE "int"); fs_c.xsd_short = fs_hash_uri(XSD_NAMESPACE "short"); fs_c.xsd_byte = fs_hash_uri(XSD_NAMESPACE "byte"); fs_c.xsd_ulong = fs_hash_uri(XSD_NAMESPACE "unsignedLong"); fs_c.xsd_uint = fs_hash_uri(XSD_NAMESPACE "unsignedInt"); fs_c.xsd_ushort = fs_hash_uri(XSD_NAMESPACE "unsignedShort"); fs_c.xsd_ubyte = fs_hash_uri(XSD_NAMESPACE "unsignedByte"); fs_c.lang_en = fs_hash_literal("en", 0); fs_c.lang_fr = fs_hash_literal("fr", 0); fs_c.lang_de = fs_hash_literal("de", 0); fs_c.lang_es = fs_hash_literal("es", 0); fs_c.rdf_type = fs_hash_uri(RDF_NAMESPACE "type"); fs_c.default_graph = fs_hash_uri(FS_DEFAULT_GRAPH); fs_c.system_config = fs_hash_uri(FS_SYSTEM_CONFIG); fs_c.rdfs_label = fs_hash_uri(RDFS_LABEL); fs_c.fs_text_index = fs_hash_uri(FS_TEXT_INDEX); fs_c.fs_token = fs_hash_uri(FS_TEXT_TOKEN); fs_c.fs_dmetaphone = fs_hash_uri(FS_TEXT_DMETAPHONE); fs_c.fs_stem = fs_hash_uri(FS_TEXT_STEM); }