示例#1
0
文件: hash.c 项目: leifj/4store
void fs_hash_init(fsp_hash_enum type)
{
    switch (type) {
    case FS_HASH_MD5:
    case FS_HASH_CRC64:
	fs_error(LOG_CRIT, "Unsuported backend hash function, exiting");
	exit(4);
	break;
    case FS_HASH_UMAC:
	break;
    case FS_HASH_UNKNOWN:
	fs_error(LOG_CRIT, "Unknown backend hash function, exiting");
	exit(4);
	break;
    }

    bnids = g_hash_table_new_full(g_str_hash, g_str_equal, bnhash_destroy,
	    NULL);

    atexit(fs_hash_fini);

    fs_c.empty = 0LL;
    fs_c.xsd_string = fs_hash_uri(XSD_STRING);
    fs_c.xsd_integer = fs_hash_uri(XSD_INTEGER);
    fs_c.xsd_float = fs_hash_uri(XSD_FLOAT);
    fs_c.xsd_double = fs_hash_uri(XSD_DOUBLE);
    fs_c.xsd_decimal = fs_hash_uri(XSD_DECIMAL);
    fs_c.xsd_boolean = fs_hash_uri(XSD_BOOLEAN);
    fs_c.xsd_datetime = fs_hash_uri(XSD_DATETIME);
    fs_c.xsd_date = fs_hash_uri(XSD_DATE);
    fs_c.xsd_pinteger = fs_hash_uri(XSD_NAMESPACE "positiveInteger");
    fs_c.xsd_ninteger = fs_hash_uri(XSD_NAMESPACE "negativeInteger");
    fs_c.xsd_npinteger = fs_hash_uri(XSD_NAMESPACE "nonPositiveInteger");
    fs_c.xsd_nninteger = fs_hash_uri(XSD_NAMESPACE "nonNegativeInteger");
    fs_c.xsd_long = fs_hash_uri(XSD_NAMESPACE "long");
    fs_c.xsd_int = fs_hash_uri(XSD_NAMESPACE "int");
    fs_c.xsd_short = fs_hash_uri(XSD_NAMESPACE "short");
    fs_c.xsd_byte = fs_hash_uri(XSD_NAMESPACE "byte");
    fs_c.xsd_ulong = fs_hash_uri(XSD_NAMESPACE "unsignedLong");
    fs_c.xsd_uint = fs_hash_uri(XSD_NAMESPACE "unsignedInt");
    fs_c.xsd_ushort = fs_hash_uri(XSD_NAMESPACE "unsignedShort");
    fs_c.xsd_ubyte = fs_hash_uri(XSD_NAMESPACE "unsignedByte");
    fs_c.lang_en = fs_hash_literal("en", 0);
    fs_c.lang_fr = fs_hash_literal("fr", 0);
    fs_c.lang_de = fs_hash_literal("de", 0);
    fs_c.lang_es = fs_hash_literal("es", 0);
    fs_c.rdf_type = fs_hash_uri(RDF_NAMESPACE "type");
    fs_c.default_graph = fs_hash_uri(FS_DEFAULT_GRAPH);
    fs_c.system_config = fs_hash_uri(FS_SYSTEM_CONFIG);
    fs_c.rdfs_label = fs_hash_uri(RDFS_LABEL);
    fs_c.fs_text_index = fs_hash_uri(FS_TEXT_INDEX);
    fs_c.fs_token = fs_hash_uri(FS_TEXT_TOKEN);
    fs_c.fs_dmetaphone = fs_hash_uri(FS_TEXT_DMETAPHONE);
    fs_c.fs_stem = fs_hash_uri(FS_TEXT_STEM);
    fs_c.fs_acl_admin = fs_hash_uri(FS_ACL_ADMIN);
    fs_c.fs_acl_access_by = fs_hash_uri(FS_ACL_ONLY_ACCESS_BY);
    fs_c.fs_acl_default_admin = fs_hash_literal(FS_ACL_DEFAULT_ADMIN,0);
}
示例#2
0
static fs_rid insert_plain(xmlctxt *ctxt)
{
  char *text = ctxt->resource;
  if (!text) {
    text = ""; /* this case is actually the empty string */
  }
  fs_rid lang = fs_c.empty;
  if (ctxt->attr) {
    lang = fs_hash_literal(ctxt->attr, fs_c.empty);
    insert_resource(ctxt, lang, fs_c.empty, ctxt->attr);
  }
  fs_rid r = fs_hash_literal(text, lang);
  insert_resource(ctxt, r, lang, text);

  return r;
}
示例#3
0
fs_value fs_value_plain_with_lang(const char *s, const char *l)
{
    fs_value v = fs_value_blank();
    if (!l || *l == '\0') {
	v.attr = fs_c.empty;
    } else {
	v.attr = fs_hash_literal(l, 0);
    }
    v.lex = (char *)s;

    return v;
}
示例#4
0
文件: 4s-rid.c 项目: nakao/4store
int main(int argc, char *argv[])
{
  if (argc != 2) {
    fprintf(stderr, "%s revision %s\n", argv[0], FS_BACKEND_VER);
    fprintf(stderr, "Usage: %s <uri> | \"literal\"\n", argv[0]);
    exit(1);
  }

  char *string = argv[1];
  char lex[128], lang[128], type[128], uri[128];
  fs_rid rid;

#ifdef FS_MD5
  fs_hash_init(FS_HASH_MD5);
#endif
#ifdef FS_CRC64
  fs_hash_init(FS_HASH_CRC64);
#endif
#ifdef FS_UMAC
  fs_hash_init(FS_HASH_UMAC);
#endif

  if (sscanf(string, "\"%127[^\"]\"@%127s", lex, lang) == 2) {
    rid = fs_hash_literal(lex,fs_hash_literal(lang, 0));
  } else if (sscanf(string, "\"%127[^\"]\"^^%127s", lex, type) == 2) {
    rid = fs_hash_literal(lex,fs_hash_uri(type));
  } else if (sscanf(string, "\"%127[^\"]\"", lex) == 1) {
    rid = fs_hash_literal(lex, 0);
  } else if (sscanf(string, "<%127[^>]>", uri) == 1) {
    rid = fs_hash_uri(uri);
  } else {
    fprintf(stderr, "Couldn't recognise a URI or literal in string '%s'\n", string);
    exit(1);
  }

  printf("%016llX\n", rid);
}
示例#5
0
static fs_rid insert_typed(xmlctxt *ctxt)
{
  char *text = ctxt->resource;
  fs_rid dt = fs_c.empty;
  if (ctxt->attr) {
    dt = fs_hash_uri(ctxt->attr);
    insert_resource(ctxt, dt, fs_c.empty, ctxt->attr);
  } else {
    fs_error(LOG_ERR, "NULL type URI inserted");
  }
  fs_rid r = fs_hash_literal(text, dt);
  insert_resource(ctxt, r, dt, text);

  return r;
}
示例#6
0
fs_value fs_value_fill_rid(fs_query *q, fs_value a)
{
    if (a.valid & fs_valid_bit(FS_V_RID)) {
        return a;
    }

    if (a.valid & fs_valid_bit(FS_V_TYPE_ERROR)) {
        a.rid = FS_RID_NULL;
    }

    fs_value_fill_lexical(q, a);

    a.rid = fs_hash_literal(a.lex, a.attr);
    a.valid |= fs_valid_bit(FS_V_RID);

    return a;
}
示例#7
0
文件: filter.c 项目: dajobe/4store
fs_value fn_cast(fs_query *q, fs_value v, fs_value d)
{
#if 0
printf("CAST ");
fs_value_print(v);
printf(" -> ");
fs_value_print(d);
printf("\n");
#endif
    if (FS_IS_URI(d.rid) && FS_IS_LITERAL(v.rid)) {
	return fn_cast_intl(q, v, d.rid);
    }
    if (d.rid == fs_c.xsd_string && FS_IS_URI(v.rid)) {
        fs_value v2 = fn_cast_intl(q, v, d.rid);
        v2.rid = fs_hash_literal(v.lex, d.rid);
	return v2;
    }

    return fs_value_error(FS_ERROR_INVALID_TYPE, "cast on URI/bNode");
}
示例#8
0
文件: update.c 项目: CloCkWeRX/4store
void fs_resource_from_rasqal_literal(struct update_context *uctxt, rasqal_literal *l, fs_resource *res, int row)
{
    if (!l) {
        res->lex = "(null)";
        res->attr = FS_RID_NULL;

        return;
    }
    rasqal_literal_type type = l->type;
    if (type == RASQAL_LITERAL_VARIABLE) {
        /* right now you can't introduce new literals in INSERT, so it doesn't
         * matter */
        res->lex = NULL;
        res->attr = FS_RID_GONE;
    } else if (type == RASQAL_LITERAL_URI) {
	res->lex = (char *)raptor_uri_as_string(l->value.uri);
        res->attr = FS_RID_NULL;
    } else {
        res->lex = (char *)l->string;
        res->attr = 0;
        fs_resource ares;
        ares.lex = NULL;
        if (l->datatype) {
            res->attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype));
            ares.rid = res->attr;
            ares.lex = (char *)raptor_uri_as_string(l->datatype);
            ares.attr = FS_RID_NULL;
        } else if (l->language) {
            res->attr = fs_hash_literal(l->language, 0);
            ares.rid = res->attr;
            ares.lex = (char *)l->language;
            ares.attr = 0;
        }
        /* insert attribute resource if there is one */
        if (ares.lex) {
            fsp_res_import(uctxt->link, FS_RID_SEGMENT(ares.rid, uctxt->segments), 1, &ares);
        }
    }
}
示例#9
0
文件: update.c 项目: CloCkWeRX/4store
fs_rid fs_hash_rasqal_literal(struct update_context *uc, rasqal_literal *l, int row)
{
    if (!l) return FS_RID_NULL;

    if (l->type == RASQAL_LITERAL_VARIABLE) {
        if (uc->q) {
            return fs_binding_get_val(uc->q->bb[0], l->value.variable, row, NULL);
        }
        fs_error(LOG_ERR, "no variables bound");

        return FS_RID_NULL;
    }

    rasqal_literal_type type = rasqal_literal_get_rdf_term_type(l);
    switch (type) {
    case RASQAL_LITERAL_URI:
        return fs_hash_uri((char *)raptor_uri_as_string(l->value.uri));
    
    case RASQAL_LITERAL_UNKNOWN:
    case RASQAL_LITERAL_STRING:
    case RASQAL_LITERAL_XSD_STRING: {
        fs_rid attr = 0;
        if (l->datatype) {
            attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype));
        } else if (l->language) {
            /* lang tags are normalised to upper case internally */
            char *lang = g_ascii_strup((char *)l->language, -1);
            attr = fs_hash_literal(lang, 0);
            g_free(lang);
        }

        return fs_hash_literal((char *)rasqal_literal_as_string(l), attr);
    }

    case RASQAL_LITERAL_BLANK: {
        raptor_term_blank_value bnode;
        bnode.string = (unsigned char *)rasqal_literal_as_string(l);
        bnode.string_len = strlen((char *)bnode.string);

        return fs_bnode_id(uc->link, bnode);
    }

    case RASQAL_LITERAL_VARIABLE:
    case RASQAL_LITERAL_QNAME:
    case RASQAL_LITERAL_PATTERN:
    case RASQAL_LITERAL_BOOLEAN:
    case RASQAL_LITERAL_INTEGER:
    case RASQAL_LITERAL_INTEGER_SUBTYPE:
    case RASQAL_LITERAL_DECIMAL:
    case RASQAL_LITERAL_FLOAT:
    case RASQAL_LITERAL_DOUBLE:
    case RASQAL_LITERAL_DATETIME:
    case RASQAL_LITERAL_UDT:
#if RASQAL_VERSION >= 929
    case RASQAL_LITERAL_DATE:
#endif
        break;
    }
    fs_error(LOG_ERR, "bad rasqal literal (type %d)", type);

    return FS_RID_NULL;
}
示例#10
0
static void rdf_parser_statement_handler(void* user_data, const raptor_statement* st) {
   raptor_term *g, *s, *p, *o;
  
   g = st->graph;
   s = st->subject;
   p = st->predicate;
   o = st->object;

   rdf_parser_internal *parser_obj = (rdf_parser_internal *) user_data;


   if (parser_obj->counter == 0)
       parser_obj->partial_parse_time = g_timer_new();
    
   parser_obj->counter++;

   /* init index logic */
   unsigned char *gc = NULL;
   fs_rid g_rid;
   if (parser_obj->trig) {
       gc = raptor_uri_as_string(g->value.uri);
       g_rid = fs_hash_uri((const char *)gc);
   } else {
       g_rid = parser_obj->g_rid ; 
       gc = parser_obj->model;
   }
   

   unsigned char *sc = NULL;
   if (s->type == RAPTOR_TERM_TYPE_URI)
        sc = raptor_uri_as_string(s->value.uri);
   else {
        sc = (unsigned char *) g_strdup_printf("bnode:b%s%s",s->value.blank.string+5,parser_obj->bnode_ts);
   }
 
   unsigned char *pc = raptor_uri_as_string(p->value.uri);
   unsigned char *oc = NULL;
   unsigned char *o_lang = NULL;
   unsigned char *o_datatype = NULL;
   fs_rid s_rid = fs_hash_uri((const char *) sc);
   fs_rid p_rid = fs_hash_uri((const char *) pc);
   fs_rid o_rid = 0x0;
   if (o->type == RAPTOR_TERM_TYPE_URI) {
       oc = raptor_uri_as_string(o->value.uri);
       o_rid = fs_hash_uri((const char *) oc);
   } else if (o->type == RAPTOR_TERM_TYPE_LITERAL) {
        oc = o->value.literal.string;
        if (o->value.literal.datatype) {
            o_datatype = raptor_uri_as_string(o->value.literal.datatype);
            o_rid = fs_hash_literal((const char *) oc,fs_hash_uri((const char *) o_datatype));
        } else if (o->value.literal.language != NULL) {
            o_lang = o->value.literal.language;
            o_rid = fs_hash_literal((const char *) oc, fs_hash_uri((const char *) o_lang));
        } else {
            o_rid = fs_hash_literal((const char *) oc, FS_RID_NULL);
        }
        oc =  raptor_term_to_string(o);
   } else if (o->type == RAPTOR_TERM_TYPE_BLANK) {
        oc = (unsigned char *) g_strdup_printf("bnode:b%s%s",o->value.blank.string+5,parser_obj->bnode_ts);
        o_rid = fs_hash_uri((const char *) oc); 
   }
   
   int seg_id = s_rid % SEGMENTS;
   fs_rid *quad = rdf_parser_new_quad(g_rid,s_rid,p_rid,o_rid);
   g_ptr_array_add(parser_obj->quads[seg_id],quad);

    
   /* saves hashes into disk hash TODO looks for optimistions */
   rdf_kb *kb = parser_obj->kb;
   fs_rid hashes[4] = {g_rid,s_rid,p_rid,o_rid};
   //log_debug("%llx %llx %llx %llx", hashes[0], hashes[1], hashes[2], hashes[3]);

   //char tmp_rid[16+1];
   char tmp_rid[17];
   unsigned char *strings[4] = {gc,sc,pc,oc};
   int assigned_hash=0;
   for(int i=0;i<4;i++) {
       memset(tmp_rid,0,16);
       sprintf(tmp_rid,"%llx",hashes[i]);
       assigned_hash = hashes[i] % HASHES_NUM;
       //if (i > 0 && strlen(tmp_rid) < 15 )
            //printf("ERRRRO NUL %s %s\n", tmp_rid, strings[i]);
       kcdbset(kb->hash_stores[assigned_hash],(const char *) tmp_rid, 16 ,(const char *) strings[i], strlen((const char *) strings[i]));
   }
   /* end of saving hashes into disk */


   if (!(parser_obj->counter % STAT_BATCH)) {
        double kt = parser_obj->counter/1e3;
        log_debug("parsing progress %.0lf kT %.2lf kT/s %.2lf kT/s",kt,kt/g_timer_elapsed(parser_obj->global_parse_time,NULL), 
               (STAT_BATCH/1e3)/g_timer_elapsed(parser_obj->partial_parse_time,NULL));
        g_timer_start(parser_obj->partial_parse_time);
   }
}
示例#11
0
文件: import.c 项目: nakao/4store
static void store_stmt(void *user_data, const raptor_statement * statement)
{
    fs_parse_stuff *data = (fs_parse_stuff *) user_data;
    char *subj = (char *) raptor_uri_as_string((raptor_uri *)
					       statement->subject);
    char *pred;
    char *obj;
    fs_rid m, s, p, o;
    char tmpp[512];

    m = data->model_hash;

    if (statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
        s = fs_bnode_id(data->link, statement->subject);
        subj = (char *) statement->subject;
    } else {
	s = fs_hash_uri(subj);
    }

    if (statement->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
        sprintf(tmpp, MEMBER_PREFIX "%d", *((int *)statement->predicate));
        pred = tmpp;
    } else {
        pred = (char *) raptor_uri_as_string((raptor_uri *)
					       statement->predicate);
    }
    p = fs_hash_uri(pred);

    fs_rid attr = fs_c.empty;
    if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
	statement->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) {
	obj = (char *) statement->object;
	if (statement->object_literal_language) {
	    char *langtag = (char *)statement->object_literal_language;
            for (char *pos = langtag; *pos; pos++) {
                if (islower(*pos)) {
                    *pos = toupper(*pos);
                }
            }
	    attr = fs_hash_literal(langtag, 0);
	    buffer_res(data->link, data->segments, attr, langtag, fs_c.empty, data->dryrun);
	} else if (raptor_uri_as_string(statement->object_literal_datatype)) {
	    char *dt = (char *)raptor_uri_as_string(statement->object_literal_datatype);
	    
	    attr = fs_hash_uri(dt);
	    buffer_res(data->link, data->segments, attr, dt, FS_RID_NULL, data->dryrun);
	}
	o = fs_hash_literal(obj, attr);
    } else if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
	o = fs_bnode_id(data->link, statement->object);
	obj = (char *) statement->object;
    } else {
	obj = (char *) raptor_uri_as_string((raptor_uri *) statement->
					    object);
        attr = FS_RID_NULL;
	o = fs_hash_uri(obj);
    }

    buffer_res(data->link, data->segments, s, subj, FS_RID_NULL, data->dryrun);
    buffer_res(data->link, data->segments, p, pred, FS_RID_NULL, data->dryrun);
    buffer_res(data->link, data->segments, o, obj, attr, data->dryrun);

    fs_rid tbuf[4] = { m, s, p, o };
retry_write:
    if (write(data->quad_fd, tbuf, sizeof(tbuf))  == -1) {
        fs_error(LOG_ERR, "failed to buffer quad to fd %d (0x%x): %s", data->quad_fd, data->quad_fd, strerror(errno));
        if (errno == EAGAIN || errno == EINTR || errno == ENOSPC) {
            sleep(5);
            goto retry_write;
        }
    }
    if (data->verbosity > 2) {
        fprintf(stderr, "%016llx %016llx %016llx %016llx\n", m, s, p, o);
    }

    data->count_trip++;
    total_triples_parsed++;
    if (data->verbosity && total_triples_parsed % 10000 == 0) {
	printf("Pass 1, processed %d triples\r", total_triples_parsed);
	fflush(stdout);
    }
    if (total_triples_parsed == FS_CHUNK_SIZE) {
	if (data->verbosity) printf("Pass 1, processed %d triples (%d)\n", FS_CHUNK_SIZE, data->count_trip);
	*(data->ext_count) += process_quads(data);
	data->last_count = data->count_trip;
	total_triples_parsed = 0;
	gettimeofday(&then_last, 0);
    }
}