Пример #1
0
static int delete_rasqal_triple(struct update_context *ct, fs_rid_vector *vec[], rasqal_triple *triple, int row)
{
    fs_rid m, s, p, o;

    if (triple->origin) {
        m = fs_hash_rasqal_literal(ct, triple->origin, row);
        if (m == FS_RID_NULL) return 1;
    } else if (ct->op->graph_uri) {
        m = fs_hash_uri((char *)raptor_uri_as_string(ct->op->graph_uri));
    } else {
        /* m can be wildcard in the absence of GRAPH, WITH etc. */
        m = FS_RID_NULL;
    }
    s = fs_hash_rasqal_literal(ct, triple->subject, row);
    if (s == FS_RID_NULL) return 1;
    p = fs_hash_rasqal_literal(ct, triple->predicate, row);
    if (p == FS_RID_NULL) return 1;
    o = fs_hash_rasqal_literal(ct, triple->object, row);
    if (o == FS_RID_NULL) return 1;

    /* as long as s, p, and o are bound, we can add this quad */
    fs_rid_vector_append(vec[0], m);
    fs_rid_vector_append(vec[1], s);
    fs_rid_vector_append(vec[2], p);
    fs_rid_vector_append(vec[3], o);

    if (fs_rid_vector_length(vec[0]) > 999) {
        fsp_delete_quads_all(ct->link, vec);
        for (int s=0; s<4; s++) {
            fs_rid_vector_truncate(vec[s], 0);
        }
    }

    return 0;
}
Пример #2
0
/* ..._start and ..._finish share an int * count parameter
 * the same variable should be passed by reference both times */
int fs_import_stream_start(fsp_link *link, const char *model_uri, const char *mimetype, int has_o_index, int *count)
{
    if (inited == 0) {
        memset(&parse_data, 0, sizeof(parse_data));
        inited = 1;
    }

    parse_data.link = link;
    parse_data.segments = fsp_link_segments(link);
    parse_data.ext_count = count;

    for (int i=0; i<parse_data.segments; i++) {
        for (int j=0; j<RES_BUF_SIZE; j++) {
            lex_tmp[i][j] = malloc(RES_BUF_SIZE);
        }
    }

    memset(nodecache, 0, sizeof(nodecache));

    parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX");
    parse_data.quad_fd = mkstemp(parse_data.quad_fn);
    if (parse_data.quad_fd < 0) {
        fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn);
        return 1;
    }

    parse_data.muri = raptor_new_uri((unsigned char *) model_uri);

    parse_data.model = g_strdup(model_uri);
    parse_data.model_hash = fs_hash_uri(model_uri);
    parse_data.count_trip = 0;
    parse_data.count_err = 0;
    parse_data.last_count = 0;
    parse_data.has_o_index = has_o_index;

    /* store the model uri */
    buffer_res(link, parse_data.segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, parse_data.dryrun);

    parse_data.parser = raptor_new_parser_for_content(NULL, mimetype, NULL, 0, (unsigned char *) parse_data.model);

    if (!parse_data.parser) {
        return 1;
    }

    /* use us as a vector for an indirect attack? no thanks */
    raptor_set_feature(parse_data.parser, RAPTOR_FEATURE_NO_NET, 0);

    raptor_set_fatal_error_handler(parse_data.parser, link, fatal_rdf_parser_error);
    raptor_set_error_handler(parse_data.parser, link, rdf_parser_error);

    raptor_set_statement_handler(parse_data.parser, &parse_data, store_stmt);
    raptor_set_graph_handler(parse_data.parser, &parse_data, graph_handler);

    raptor_start_parse(parse_data.parser, parse_data.muri);

    fs_hash_freshen(); /* blank nodes are unique per file */

    return 0;
}
Пример #3
0
fs_value fs_value_uri(const char *s)
{
    fs_value v = fs_value_blank();
    v.rid = fs_hash_uri(s);
    v.lex = (char *)s;
    v.valid = fs_valid_bit(FS_V_RID);
    v.attr = FS_RID_NULL;

    return v;
}
Пример #4
0
static fs_rid insert_uri(xmlctxt *ctxt)
{
  char *uri = ctxt->resource;
  if (!uri || uri[0] == '\0') {
    fs_error(LOG_ERR, "NULL URI inserted");
    return 0;
  }
  fs_rid r = fs_hash_uri(uri);
  insert_resource(ctxt, r, fs_c.empty, uri);

  return r;
}
Пример #5
0
void graph_handler(void *user_data, raptor_uri *graph)
{
    g_free(parse_data.model);
    if (graph == NULL) {
        parse_data.model = g_strdup((char *) raptor_uri_as_string(parse_data.muri));
    } else {
        parse_data.model = g_strdup((char *) raptor_uri_as_string(graph));
    }

    parse_data.model_hash = fs_hash_uri(parse_data.model);
    buffer_res(parse_data.link, parse_data.segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, parse_data.dryrun);
}
Пример #6
0
fs_value fs_value_plain_with_dt(const char *s, const char *d)
{
    fs_value v = fs_value_blank();
    if (!d || *d == '\0') {
	v.attr = fs_c.empty;
    } else {
	v.attr = fs_hash_uri(d);
    }
    v.lex = (char *)s;

    return v;
}
Пример #7
0
int main(int argc, char *argv[])
{
  if (argc != 2) {
    fprintf(stderr, "%s revision %s\n", argv[0], FS_BACKEND_VER);
    fprintf(stderr, "Usage: %s <uri> | \"literal\"\n", argv[0]);
    exit(1);
  }

  char *string = argv[1];
  char lex[128], lang[128], type[128], uri[128];
  fs_rid rid;

#ifdef FS_MD5
  fs_hash_init(FS_HASH_MD5);
#endif
#ifdef FS_CRC64
  fs_hash_init(FS_HASH_CRC64);
#endif
#ifdef FS_UMAC
  fs_hash_init(FS_HASH_UMAC);
#endif

  if (sscanf(string, "\"%127[^\"]\"@%127s", lex, lang) == 2) {
    rid = fs_hash_literal(lex,fs_hash_literal(lang, 0));
  } else if (sscanf(string, "\"%127[^\"]\"^^%127s", lex, type) == 2) {
    rid = fs_hash_literal(lex,fs_hash_uri(type));
  } else if (sscanf(string, "\"%127[^\"]\"", lex) == 1) {
    rid = fs_hash_literal(lex, 0);
  } else if (sscanf(string, "<%127[^>]>", uri) == 1) {
    rid = fs_hash_uri(uri);
  } else {
    fprintf(stderr, "Couldn't recognise a URI or literal in string '%s'\n", string);
    exit(1);
  }

  printf("%016llX\n", rid);
}
Пример #8
0
static int insert_rasqal_triple(struct update_context *uc, rasqal_triple *triple, int row)
{
    fs_rid quad_buf[1][4];
    fs_resource res;
    if (triple->origin) {
        fs_resource_from_rasqal_literal(uc, triple->origin, &res, row);
        quad_buf[0][0] = fs_hash_rasqal_literal(uc, triple->origin, row);
    } else if (uc->op->graph_uri) {
        res.lex = (char *)raptor_uri_as_string(uc->op->graph_uri);
        res.attr = FS_RID_NULL;
        quad_buf[0][0] =
            fs_hash_uri((char *)raptor_uri_as_string(uc->op->graph_uri));
    } else {
        quad_buf[0][0] = fs_c.default_graph;
        res.lex = FS_DEFAULT_GRAPH;
        res.attr = FS_RID_NULL;
    }

    if (quad_buf[0][0] == fs_c.system_config)
        fsp_reload_acl_system(uc->link);

    if (!FS_IS_URI(quad_buf[0][0])) {
        return 1;
    }
    quad_buf[0][1] = fs_hash_rasqal_literal(uc, triple->subject, row);
    if (FS_IS_LITERAL(quad_buf[0][1])) {
        return 1;
    }
    quad_buf[0][2] = fs_hash_rasqal_literal(uc, triple->predicate, row);
    if (!FS_IS_URI(quad_buf[0][2])) {
        return 1;
    }
    quad_buf[0][3] = fs_hash_rasqal_literal(uc, triple->object, row);
    res.rid = quad_buf[0][0];
    if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][0], uc->segments), 1, &res);
    res.rid = quad_buf[0][1];
    fs_resource_from_rasqal_literal(uc, triple->subject, &res, 0);
    if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), 1, &res);
    res.rid = quad_buf[0][2];
    fs_resource_from_rasqal_literal(uc, triple->predicate, &res, 0);
    if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][2], uc->segments), 1, &res);
    res.rid = quad_buf[0][3];
    fs_resource_from_rasqal_literal(uc, triple->object, &res, 0);
    if (res.lex) fsp_res_import(uc->link, FS_RID_SEGMENT(quad_buf[0][3], uc->segments), 1, &res);
    fsp_quad_import(uc->link, FS_RID_SEGMENT(quad_buf[0][1], uc->segments), FS_BIND_BY_SUBJECT, 1, quad_buf);
//printf("I %016llx %016llx %016llx %016llx\n", quad_buf[0][0], quad_buf[0][1], quad_buf[0][2], quad_buf[0][3]);

    return 0;
}
Пример #9
0
static fs_rid insert_typed(xmlctxt *ctxt)
{
  char *text = ctxt->resource;
  fs_rid dt = fs_c.empty;
  if (ctxt->attr) {
    dt = fs_hash_uri(ctxt->attr);
    insert_resource(ctxt, dt, fs_c.empty, ctxt->attr);
  } else {
    fs_error(LOG_ERR, "NULL type URI inserted");
  }
  fs_rid r = fs_hash_literal(text, dt);
  insert_resource(ctxt, r, dt, text);

  return r;
}
Пример #10
0
void fs_resource_from_rasqal_literal(struct update_context *uctxt, rasqal_literal *l, fs_resource *res, int row)
{
    if (!l) {
        res->lex = "(null)";
        res->attr = FS_RID_NULL;

        return;
    }
    rasqal_literal_type type = l->type;
    if (type == RASQAL_LITERAL_VARIABLE) {
        /* right now you can't introduce new literals in INSERT, so it doesn't
         * matter */
        res->lex = NULL;
        res->attr = FS_RID_GONE;
    } else if (type == RASQAL_LITERAL_URI) {
	res->lex = (char *)raptor_uri_as_string(l->value.uri);
        res->attr = FS_RID_NULL;
    } else {
        res->lex = (char *)l->string;
        res->attr = 0;
        fs_resource ares;
        ares.lex = NULL;
        if (l->datatype) {
            res->attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype));
            ares.rid = res->attr;
            ares.lex = (char *)raptor_uri_as_string(l->datatype);
            ares.attr = FS_RID_NULL;
        } else if (l->language) {
            res->attr = fs_hash_literal(l->language, 0);
            ares.rid = res->attr;
            ares.lex = (char *)l->language;
            ares.attr = 0;
        }
        /* insert attribute resource if there is one */
        if (ares.lex) {
            fsp_res_import(uctxt->link, FS_RID_SEGMENT(ares.rid, uctxt->segments), 1, &ares);
        }
    }
}
Пример #11
0
int fs_clear(struct update_context *uc, char *graphuri)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid mrid;
    if (graphuri) {
        mrid = fs_hash_uri(graphuri);
    } else {
        graphuri = FS_DEFAULT_GRAPH;
        mrid = fs_c.default_graph;
    }
    fs_rid_vector_append(mvec, mrid);

    int errors = 0;
    if (fsp_delete_model_all(uc->link, mvec)) {
        errors++;
        add_message(uc, g_strdup_printf("Error while trying to delete %s", graphuri), 1);
    } else {
        add_message(uc, g_strdup_printf("Deleted <%s>", graphuri), 1);
    }
    fs_rid_vector_free(mvec);

    return errors;
}
Пример #12
0
int fs_copy(struct update_context *uc, char *from, char *to)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid_vector *empty = fs_rid_vector_new(0);

    fs_rid fromrid, torid;
    if (from) {
        fromrid = fs_hash_uri(from);
    } else {
        from = FS_DEFAULT_GRAPH;
        fromrid = fs_c.default_graph;
    }
    if (to) {
        torid = fs_hash_uri(to);
    } else {
        to = FS_DEFAULT_GRAPH;
        torid = fs_c.default_graph;
    }

    if (fromrid == torid) {
        /*don't need to do anything */
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    fs_rid_vector_append(mvec, fromrid);

    /* search for all the triples in from */
    fs_rid_vector **results;
    fs_rid_vector *slot[4] = { mvec, empty, empty, empty };

    /* see if there's any data in <from> */
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT,
             slot, &results, -1, 1);
    if (!results || results[0]->length == 0) {
        if (results) {
            fs_rid_vector_free(results[0]);
            free(results);
        }
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("<%s> is empty, not copying", from), 1);

        return 1;
    }

    fs_rid_vector_free(results[0]);
    free(results);

    /* get the contents of <from> */
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT,
             slot, &results, -1, -1);

    /* map old bnodes to new ones */
    map_bnodes(uc, results[0]);
    map_bnodes(uc, results[1]);
    map_bnodes(uc, results[2]);

    /* delete <to> */
    mvec->data[0] = torid;
    if (fsp_delete_model_all(uc->link, mvec)) {
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("Error while trying to delete %s", to), 1);

        return 1;
    }

    fs_rid_vector_free(mvec);
    fs_rid_vector_free(empty);

    /* insert <to> */
    fs_resource tores;
    tores.lex = to;
    tores.attr= FS_RID_NULL;
    tores.rid = torid;
    fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores);
    
    insert_triples(uc, torid, results[0], results[1], results[2]);

    add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1);
    add_message(uc, g_strdup_printf("%d triples added, ?? removed", results[0]->length), 1);

    for (int i=0; i<3; i++) {
        fs_rid_vector_free(results[i]);
    }
    free(results);

    return 0;
}
Пример #13
0
int fs_add(struct update_context *uc, char *from, char *to)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid_vector *empty = fs_rid_vector_new(0);

    fs_rid fromrid, torid;
    if (from) {
        fromrid = fs_hash_uri(from);
    } else {
        from = FS_DEFAULT_GRAPH;
        fromrid = fs_c.default_graph;
    }
    if (to) {
        torid = fs_hash_uri(to);
    } else {
        to = FS_DEFAULT_GRAPH;
        torid = fs_c.default_graph;
    }

    if (fromrid == torid) {
        /*don't need to do anything */
        add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    fs_rid_vector_append(mvec, fromrid);

    int errors = 0;

    /* search for all the triples in from */
    fs_rid_vector **results;
    fs_rid_vector *slot[4] = { mvec, empty, empty, empty };
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT,
             slot, &results, -1, -1);
    fs_rid_vector_free(mvec);
    fs_rid_vector_free(empty);

    if (!results || results[0]->length == 0) {
        /* there's nothing to add */
        if (results) {
            for (int i=0; i<3; i++) {
                fs_rid_vector_free(results[i]);
            }
            free(results);
        }
        add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    map_bnodes(uc, results[0]);
    map_bnodes(uc, results[1]);
    map_bnodes(uc, results[2]);

    fs_resource tores;
    tores.lex = to;
    tores.attr= FS_RID_NULL;
    tores.rid = torid;
    fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores);
    
    insert_triples(uc, torid, results[0], results[1], results[2]);

    add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
    add_message(uc, g_strdup_printf("%d triples added, 0 removed", results[0]->length), 1);

    for (int i=0; i<3; i++) {
        fs_rid_vector_free(results[i]);
    }
    free(results);

    return errors;
}
Пример #14
0
fs_rid fs_hash_rasqal_literal(struct update_context *uc, rasqal_literal *l, int row)
{
    if (!l) return FS_RID_NULL;

    if (l->type == RASQAL_LITERAL_VARIABLE) {
        if (uc->q) {
            return fs_binding_get_val(uc->q->bb[0], l->value.variable, row, NULL);
        }
        fs_error(LOG_ERR, "no variables bound");

        return FS_RID_NULL;
    }

    rasqal_literal_type type = rasqal_literal_get_rdf_term_type(l);
    switch (type) {
    case RASQAL_LITERAL_URI:
        return fs_hash_uri((char *)raptor_uri_as_string(l->value.uri));
    
    case RASQAL_LITERAL_UNKNOWN:
    case RASQAL_LITERAL_STRING:
    case RASQAL_LITERAL_XSD_STRING: {
        fs_rid attr = 0;
        if (l->datatype) {
            attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype));
        } else if (l->language) {
            /* lang tags are normalised to upper case internally */
            char *lang = g_ascii_strup((char *)l->language, -1);
            attr = fs_hash_literal(lang, 0);
            g_free(lang);
        }

        return fs_hash_literal((char *)rasqal_literal_as_string(l), attr);
    }

    case RASQAL_LITERAL_BLANK: {
        raptor_term_blank_value bnode;
        bnode.string = (unsigned char *)rasqal_literal_as_string(l);
        bnode.string_len = strlen((char *)bnode.string);

        return fs_bnode_id(uc->link, bnode);
    }

    case RASQAL_LITERAL_VARIABLE:
    case RASQAL_LITERAL_QNAME:
    case RASQAL_LITERAL_PATTERN:
    case RASQAL_LITERAL_BOOLEAN:
    case RASQAL_LITERAL_INTEGER:
    case RASQAL_LITERAL_INTEGER_SUBTYPE:
    case RASQAL_LITERAL_DECIMAL:
    case RASQAL_LITERAL_FLOAT:
    case RASQAL_LITERAL_DOUBLE:
    case RASQAL_LITERAL_DATETIME:
    case RASQAL_LITERAL_UDT:
#if RASQAL_VERSION >= 929
    case RASQAL_LITERAL_DATE:
#endif
        break;
    }
    fs_error(LOG_ERR, "bad rasqal literal (type %d)", type);

    return FS_RID_NULL;
}
Пример #15
0
static void rdf_parser_statement_handler(void* user_data, const raptor_statement* st) {
   raptor_term *g, *s, *p, *o;
  
   g = st->graph;
   s = st->subject;
   p = st->predicate;
   o = st->object;

   rdf_parser_internal *parser_obj = (rdf_parser_internal *) user_data;


   if (parser_obj->counter == 0)
       parser_obj->partial_parse_time = g_timer_new();
    
   parser_obj->counter++;

   /* init index logic */
   unsigned char *gc = NULL;
   fs_rid g_rid;
   if (parser_obj->trig) {
       gc = raptor_uri_as_string(g->value.uri);
       g_rid = fs_hash_uri((const char *)gc);
   } else {
       g_rid = parser_obj->g_rid ; 
       gc = parser_obj->model;
   }
   

   unsigned char *sc = NULL;
   if (s->type == RAPTOR_TERM_TYPE_URI)
        sc = raptor_uri_as_string(s->value.uri);
   else {
        sc = (unsigned char *) g_strdup_printf("bnode:b%s%s",s->value.blank.string+5,parser_obj->bnode_ts);
   }
 
   unsigned char *pc = raptor_uri_as_string(p->value.uri);
   unsigned char *oc = NULL;
   unsigned char *o_lang = NULL;
   unsigned char *o_datatype = NULL;
   fs_rid s_rid = fs_hash_uri((const char *) sc);
   fs_rid p_rid = fs_hash_uri((const char *) pc);
   fs_rid o_rid = 0x0;
   if (o->type == RAPTOR_TERM_TYPE_URI) {
       oc = raptor_uri_as_string(o->value.uri);
       o_rid = fs_hash_uri((const char *) oc);
   } else if (o->type == RAPTOR_TERM_TYPE_LITERAL) {
        oc = o->value.literal.string;
        if (o->value.literal.datatype) {
            o_datatype = raptor_uri_as_string(o->value.literal.datatype);
            o_rid = fs_hash_literal((const char *) oc,fs_hash_uri((const char *) o_datatype));
        } else if (o->value.literal.language != NULL) {
            o_lang = o->value.literal.language;
            o_rid = fs_hash_literal((const char *) oc, fs_hash_uri((const char *) o_lang));
        } else {
            o_rid = fs_hash_literal((const char *) oc, FS_RID_NULL);
        }
        oc =  raptor_term_to_string(o);
   } else if (o->type == RAPTOR_TERM_TYPE_BLANK) {
        oc = (unsigned char *) g_strdup_printf("bnode:b%s%s",o->value.blank.string+5,parser_obj->bnode_ts);
        o_rid = fs_hash_uri((const char *) oc); 
   }
   
   int seg_id = s_rid % SEGMENTS;
   fs_rid *quad = rdf_parser_new_quad(g_rid,s_rid,p_rid,o_rid);
   g_ptr_array_add(parser_obj->quads[seg_id],quad);

    
   /* saves hashes into disk hash TODO looks for optimistions */
   rdf_kb *kb = parser_obj->kb;
   fs_rid hashes[4] = {g_rid,s_rid,p_rid,o_rid};
   //log_debug("%llx %llx %llx %llx", hashes[0], hashes[1], hashes[2], hashes[3]);

   //char tmp_rid[16+1];
   char tmp_rid[17];
   unsigned char *strings[4] = {gc,sc,pc,oc};
   int assigned_hash=0;
   for(int i=0;i<4;i++) {
       memset(tmp_rid,0,16);
       sprintf(tmp_rid,"%llx",hashes[i]);
       assigned_hash = hashes[i] % HASHES_NUM;
       //if (i > 0 && strlen(tmp_rid) < 15 )
            //printf("ERRRRO NUL %s %s\n", tmp_rid, strings[i]);
       kcdbset(kb->hash_stores[assigned_hash],(const char *) tmp_rid, 16 ,(const char *) strings[i], strlen((const char *) strings[i]));
   }
   /* end of saving hashes into disk */


   if (!(parser_obj->counter % STAT_BATCH)) {
        double kt = parser_obj->counter/1e3;
        log_debug("parsing progress %.0lf kT %.2lf kT/s %.2lf kT/s",kt,kt/g_timer_elapsed(parser_obj->global_parse_time,NULL), 
               (STAT_BATCH/1e3)/g_timer_elapsed(parser_obj->partial_parse_time,NULL));
        g_timer_start(parser_obj->partial_parse_time);
   }
}
Пример #16
0
static void store_stmt(void *user_data, const raptor_statement * statement)
{
    fs_parse_stuff *data = (fs_parse_stuff *) user_data;
    char *subj = (char *) raptor_uri_as_string((raptor_uri *)
					       statement->subject);
    char *pred;
    char *obj;
    fs_rid m, s, p, o;
    char tmpp[512];

    m = data->model_hash;

    if (statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
        s = fs_bnode_id(data->link, statement->subject);
        subj = (char *) statement->subject;
    } else {
	s = fs_hash_uri(subj);
    }

    if (statement->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
        sprintf(tmpp, MEMBER_PREFIX "%d", *((int *)statement->predicate));
        pred = tmpp;
    } else {
        pred = (char *) raptor_uri_as_string((raptor_uri *)
					       statement->predicate);
    }
    p = fs_hash_uri(pred);

    fs_rid attr = fs_c.empty;
    if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
	statement->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) {
	obj = (char *) statement->object;
	if (statement->object_literal_language) {
	    char *langtag = (char *)statement->object_literal_language;
            for (char *pos = langtag; *pos; pos++) {
                if (islower(*pos)) {
                    *pos = toupper(*pos);
                }
            }
	    attr = fs_hash_literal(langtag, 0);
	    buffer_res(data->link, data->segments, attr, langtag, fs_c.empty, data->dryrun);
	} else if (raptor_uri_as_string(statement->object_literal_datatype)) {
	    char *dt = (char *)raptor_uri_as_string(statement->object_literal_datatype);
	    
	    attr = fs_hash_uri(dt);
	    buffer_res(data->link, data->segments, attr, dt, FS_RID_NULL, data->dryrun);
	}
	o = fs_hash_literal(obj, attr);
    } else if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
	o = fs_bnode_id(data->link, statement->object);
	obj = (char *) statement->object;
    } else {
	obj = (char *) raptor_uri_as_string((raptor_uri *) statement->
					    object);
        attr = FS_RID_NULL;
	o = fs_hash_uri(obj);
    }

    buffer_res(data->link, data->segments, s, subj, FS_RID_NULL, data->dryrun);
    buffer_res(data->link, data->segments, p, pred, FS_RID_NULL, data->dryrun);
    buffer_res(data->link, data->segments, o, obj, attr, data->dryrun);

    fs_rid tbuf[4] = { m, s, p, o };
retry_write:
    if (write(data->quad_fd, tbuf, sizeof(tbuf))  == -1) {
        fs_error(LOG_ERR, "failed to buffer quad to fd %d (0x%x): %s", data->quad_fd, data->quad_fd, strerror(errno));
        if (errno == EAGAIN || errno == EINTR || errno == ENOSPC) {
            sleep(5);
            goto retry_write;
        }
    }
    if (data->verbosity > 2) {
        fprintf(stderr, "%016llx %016llx %016llx %016llx\n", m, s, p, o);
    }

    data->count_trip++;
    total_triples_parsed++;
    if (data->verbosity && total_triples_parsed % 10000 == 0) {
	printf("Pass 1, processed %d triples\r", total_triples_parsed);
	fflush(stdout);
    }
    if (total_triples_parsed == FS_CHUNK_SIZE) {
	if (data->verbosity) printf("Pass 1, processed %d triples (%d)\n", FS_CHUNK_SIZE, data->count_trip);
	*(data->ext_count) += process_quads(data);
	data->last_count = data->count_trip;
	total_triples_parsed = 0;
	gettimeofday(&then_last, 0);
    }
}
Пример #17
0
int fs_import(fsp_link *link, const char *model_uri, char *resource_uri,
	      const char *format, int verbosity, int dryrun, int has_o_index,
              FILE *msg, int *count)
{
    raptor_parser *rdf_parser = NULL;
    raptor_uri ruri = NULL;
    int ret = 0;

    const int segments = fsp_link_segments(link);

    parse_data.ext_count = count;
    if (!inited) {
        inited = 1;
        parse_data.link = link;
        parse_data.segments = fsp_link_segments(link);

        for (int i=0; i<parse_data.segments; i++) {
            for (int j=0; j<RES_BUF_SIZE; j++) {
                lex_tmp[i][j] = malloc(RES_BUF_SIZE);
            }
        }

        memset(nodecache, 0, sizeof(nodecache));

        parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX");
        parse_data.quad_fd = mkstemp(parse_data.quad_fn);
        if (parse_data.quad_fd < 0) {
            fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn);
            return 1;
        }
        gettimeofday(&then_last, 0);
    }

    parse_data.verbosity = verbosity;
    parse_data.model = g_strdup(model_uri);
    parse_data.model_hash = fs_hash_uri(model_uri);
    parse_data.count_trip = 0;
    parse_data.last_count = 0;
    parse_data.dryrun = dryrun;
    parse_data.has_o_index = has_o_index;

    /* store the model uri */
    buffer_res(link, segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, dryrun);

    if (strcmp(format, "auto")) {
       rdf_parser = raptor_new_parser(format);
    } else if (strstr(resource_uri, ".n3") || strstr(resource_uri, ".ttl")) {
        rdf_parser = raptor_new_parser("turtle");
    } else if (strstr(resource_uri, ".nt")) {
        rdf_parser = raptor_new_parser("ntriples");
    } else {
        rdf_parser = raptor_new_parser("rdfxml");
    }
    if (!rdf_parser) {
        fs_error(LOG_ERR, "failed to create RDF parser");
        return 1;
    }

    raptor_set_statement_handler(rdf_parser, &parse_data, store_stmt);
    raptor_set_graph_handler(rdf_parser, &parse_data, graph_handler);
    ruri = raptor_new_uri((unsigned char *) resource_uri);
    parse_data.muri = raptor_new_uri((unsigned char *) model_uri);

    if (raptor_parse_uri(rdf_parser, ruri, parse_data.muri)) {
        fs_error(LOG_ERR, "failed to parse file “%s”", resource_uri);
        ret++;
    }
    if (verbosity) {
        printf("Pass 1, processed %d triples (%d)\n", total_triples_parsed, parse_data.count_trip);
    }

    raptor_free_parser(rdf_parser);
    raptor_free_uri(ruri);
    raptor_free_uri(parse_data.muri);
    g_free(parse_data.model);
    fs_hash_freshen(); /* blank nodes are unique per file */

    return ret;
}
Пример #18
0
int main(int argc, char *argv[])
{
    int verbosity = 0;
    int dryrun = 0;
    char *password = NULL;
    char *format = "auto";
    FILE *msg = stderr;
    char *optstring = "am:M:vnf:";
    int c, opt_index = 0, help = 0;
    int files = 0, adding = 0;
    char *kb_name = NULL;
    char *model[argc], *uri[argc];
    char *model_default = NULL;

    password = fsp_argv_password(&argc, argv);

    static struct option long_options[] = {
        { "add", 0, 0, 'a' },
        { "model", 1, 0, 'm' },
        { "model-default", 1, 0, 'M' },
        { "verbose", 0, 0, 'v' },
        { "dryrun", 0, 0, 'n' },
        { "no-resources", 0, 0, 'R' },
        { "no-quads", 0, 0, 'Q' },
        { "format", 1, 0, 'f' },
        { "help", 0, 0, 'h' },
        { "version", 0, 0, 'V' },
        { 0, 0, 0, 0 }
    };

    for (int i= 0; i < argc; ++i) {
      model[i] = NULL;
    }

    int help_return = 1;

    while ((c = getopt_long (argc, argv, optstring, long_options, &opt_index)) != -1) {
        if (c == 'm') {
	    model[files++] = optarg;
        } else if (c == 'M') {
            model_default = optarg;
        } else if (c == 'v') {
	    verbosity++;
        } else if (c == 'a') {
	    adding = 1;
        } else if (c == 'n') {
	    dryrun |= FS_DRYRUN_DELETE | FS_DRYRUN_RESOURCES | FS_DRYRUN_QUADS;
	} else if (c == 'R') {
	    dryrun |= FS_DRYRUN_RESOURCES;
	} else if (c == 'Q') {
	    dryrun |= FS_DRYRUN_QUADS;
        } else if (c == 'f') {
            format = optarg;
        } else if (c == 'h') {
	    help = 1;
            help_return = 0;
        } else if (c == 'V') {
            printf("%s, built for 4store %s\n", argv[0], GIT_REV);
            exit(0);
        } else {
	    help = 1;
        }
    }

    if (verbosity > 0) {
	if (dryrun & FS_DRYRUN_DELETE) {
	    printf("warning: not deleting old model\n");
	}
	if (dryrun & FS_DRYRUN_RESOURCES) {
	    printf("warning: not importing resource nodes\n");
	}
	if (dryrun & FS_DRYRUN_QUADS) {
	    printf("warning: not importing quad graph\n");
	}
    }

    files = 0;
    for (int k = optind; k < argc; ++k) {
        if (!kb_name) {
            kb_name = argv[k];
        } else {
	    if (strchr(argv[k], ':')) {
		uri[files] = g_strdup(argv[k]);
	    } else {
		uri[files] = (char *)raptor_uri_filename_to_uri_string(argv[k]);
	    }
            if (!model[files]) {
                if (!model_default) {
                    model[files] = uri[files];
                } else {
                    model[files] = model_default;
                }
            }
            files++;
        }
    }

    raptor_world *rw = raptor_new_world();
    if (help || !kb_name || files == 0) {
        fprintf(stdout, "%s revision %s\n", argv[0], FS_FRONTEND_VER);
        fprintf(stdout, "Usage: %s <kbname> <rdf file/URI> ...\n", argv[0]);
        fprintf(stdout, " -v --verbose   increase verbosity (can repeat)\n");
        fprintf(stdout, " -a --add       add data to models instead of replacing\n");
        fprintf(stdout, " -m --model     specify a model URI for the next RDF file\n");
        fprintf(stdout, " -M --model-default specify a model URI for all RDF files\n");
        fprintf(stdout, " -f --format    specify an RDF syntax for the import\n");
        fprintf(stdout, "\n   available formats are:\n");

        for (unsigned int i=0; 1; i++) {
            const raptor_syntax_description *desc =
                    raptor_world_get_parser_description(rw, i);
            if (!desc) {
                break;
            }
            fprintf(stdout, "    %12s - %s\n", desc->names[0], desc->label);
        }
        exit(help_return);
    }

    fsp_syslog_enable();

    fsplink = fsp_open_link(kb_name, password, FS_OPEN_HINT_RW);

    if (!fsplink) {
      fs_error (LOG_ERR, "couldn't connect to “%s”", kb_name);
      exit(2);
    }

    const char *features = fsp_link_features(fsplink);
    int has_o_index = !(strstr(features, "no-o-index")); /* tweak */

    fs_hash_init(fsp_hash_type(fsplink));
    const int segments = fsp_link_segments(fsplink);
    int total_triples = 0;

    fs_import_timing timing[segments];

    for (int seg = 0; seg < segments; seg++) {
        fsp_get_import_times(fsplink, seg, &timing[seg]);
    }

    gettimeofday(&then, 0);

    if (fsp_start_import_all(fsplink)) {
	fs_error(LOG_ERR, "aborting import");

	exit(3);
    }

#if 0
printf("press enter\n");
char foo;
read(0, &foo, 1);
#endif

    fs_rid_vector *mvec = fs_rid_vector_new(0);

    for (int f= 0; f < files; ++f) {
        fs_rid muri = fs_hash_uri(model[f]);
        fs_rid_vector_append(mvec, muri);
    }
    if (!adding) {
        if (verbosity) {
	    printf("removing old data\n");
	    fflush(stdout);
        }
        if (!(dryrun & FS_DRYRUN_DELETE)) {
	    if (fsp_delete_model_all(fsplink, mvec)) {
	        fs_error(LOG_ERR, "model delete failed");
	        return 1;
	    }
	    for (int i=0; i<mvec->length; i++) {
		if (mvec->data[i] == fs_c.system_config) {
		    fs_import_reread_config();
		}
	    }
        }
        fsp_new_model_all(fsplink, mvec);
    }

    fs_rid_vector_free(mvec);

    gettimeofday(&then_last, 0);
    for (int f = 0; f < files; ++f) {
	if (verbosity) {
            printf("Reading <%s>\n", uri[f]);
            if (strcmp(uri[f], model[f])) {
                printf("   into <%s>\n", model[f]);
            }
	    fflush(stdout);
        }

        fs_import(fsplink, model[f], uri[f], format, verbosity, dryrun, has_o_index, msg, &total_triples);
	if (verbosity) {
	    fflush(stdout);
        }
    }
    double sthen = fs_time();
    int ret = fs_import_commit(fsplink, verbosity, dryrun, has_o_index, msg, &total_triples);

    if (verbosity > 0) {
	printf("Updating index\n");
        fflush(stdout);
    }
    fsp_stop_import_all(fsplink);
    if (verbosity > 0) {
        printf("Index update took %f seconds\n", fs_time()-sthen);
    }

    if (!ret) {
        gettimeofday(&now, 0);
        double diff = (now.tv_sec - then.tv_sec) +
                        (now.tv_usec - then.tv_usec) * 0.000001;
        if (verbosity && total_triples > 0) {
	    printf("Imported %d triples, average %d triples/s\n", total_triples,
		     (int)((double)total_triples/diff));
            fflush(stdout);
        }
    }

    if (verbosity > 1) {
        printf("seg add_q\tadd_r\t\tcommit_q\tcommit_r\tremove\t\trebuild\t\twrite\n");
        long long *tics = fsp_profile_write(fsplink);

        for (int seg = 0; seg < segments; seg++) {
            fs_import_timing newtimes;
            fsp_get_import_times(fsplink, seg, &newtimes);

	    printf("%2d: %f\t%f\t%f\t%f\t%f\t%f\t%f\n", seg,
                   newtimes.add_s - timing[seg].add_s,
	           newtimes.add_r - timing[seg].add_r,
	           newtimes.commit_q - timing[seg].commit_q,
                   newtimes.commit_r - timing[seg].commit_r,
                   newtimes.remove - timing[seg].remove,
		   newtimes.rebuild - timing[seg].rebuild,
		   tics[seg] * 0.001);
	}
    }

    fsp_close_link(fsplink);
    raptor_free_world(rw);

    return 0;
}
Пример #19
0
Файл: hash.c Проект: rafl/4store
void fs_hash_init(fsp_hash_enum type)
{
    switch (type) {
    case FS_HASH_MD5:
    case FS_HASH_CRC64:
	fs_error(LOG_CRIT, "Unsuported backend hash function, exiting");
	exit(4);
	break;
    case FS_HASH_UMAC:
	break;
    case FS_HASH_UNKNOWN:
	fs_error(LOG_CRIT, "Unknown backend hash function, exiting");
	exit(4);
	break;
    }

    bnids = g_hash_table_new_full(g_str_hash, g_str_equal, bnhash_destroy,
	    NULL);

    atexit(fs_hash_fini);

    fs_c.empty = 0LL;
    fs_c.xsd_string = fs_hash_uri(XSD_STRING);
    fs_c.xsd_integer = fs_hash_uri(XSD_INTEGER);
    fs_c.xsd_float = fs_hash_uri(XSD_FLOAT);
    fs_c.xsd_double = fs_hash_uri(XSD_DOUBLE);
    fs_c.xsd_decimal = fs_hash_uri(XSD_DECIMAL);
    fs_c.xsd_boolean = fs_hash_uri(XSD_BOOLEAN);
    fs_c.xsd_datetime = fs_hash_uri(XSD_DATETIME);
    fs_c.xsd_pinteger = fs_hash_uri(XSD_NAMESPACE "positiveInteger");
    fs_c.xsd_ninteger = fs_hash_uri(XSD_NAMESPACE "negativeInteger");
    fs_c.xsd_npinteger = fs_hash_uri(XSD_NAMESPACE "nonPositiveInteger");
    fs_c.xsd_nninteger = fs_hash_uri(XSD_NAMESPACE "nonNegativeInteger");
    fs_c.xsd_long = fs_hash_uri(XSD_NAMESPACE "long");
    fs_c.xsd_int = fs_hash_uri(XSD_NAMESPACE "int");
    fs_c.xsd_short = fs_hash_uri(XSD_NAMESPACE "short");
    fs_c.xsd_byte = fs_hash_uri(XSD_NAMESPACE "byte");
    fs_c.xsd_ulong = fs_hash_uri(XSD_NAMESPACE "unsignedLong");
    fs_c.xsd_uint = fs_hash_uri(XSD_NAMESPACE "unsignedInt");
    fs_c.xsd_ushort = fs_hash_uri(XSD_NAMESPACE "unsignedShort");
    fs_c.xsd_ubyte = fs_hash_uri(XSD_NAMESPACE "unsignedByte");
    fs_c.lang_en = fs_hash_literal("en", 0);
    fs_c.lang_fr = fs_hash_literal("fr", 0);
    fs_c.lang_de = fs_hash_literal("de", 0);
    fs_c.lang_es = fs_hash_literal("es", 0);
    fs_c.rdf_type = fs_hash_uri(RDF_NAMESPACE "type");
    fs_c.default_graph = fs_hash_uri(FS_DEFAULT_GRAPH);
    fs_c.system_config = fs_hash_uri(FS_SYSTEM_CONFIG);
    fs_c.rdfs_label = fs_hash_uri(RDFS_LABEL);
    fs_c.fs_text_index = fs_hash_uri(FS_TEXT_INDEX);
    fs_c.fs_token = fs_hash_uri(FS_TEXT_TOKEN);
    fs_c.fs_dmetaphone = fs_hash_uri(FS_TEXT_DMETAPHONE);
    fs_c.fs_stem = fs_hash_uri(FS_TEXT_STEM);
}