static void map_bnodes(struct update_context *uc, fs_rid_vector *r) { for (int i=0; i<r->length; i++) { if (FS_IS_BNODE(r->data[i]) && r->data[i] != FS_RID_NULL) { char tmp[32]; sprintf(tmp, "f_%016llx", r->data[i]); raptor_term_blank_value bnode; bnode.string = (unsigned char *)tmp; bnode.string_len = 0; r->data[i] = fs_bnode_id(uc->link, bnode); } } }
fs_rid fs_hash_rasqal_literal(struct update_context *uc, rasqal_literal *l, int row) { if (!l) return FS_RID_NULL; if (l->type == RASQAL_LITERAL_VARIABLE) { if (uc->q) { return fs_binding_get_val(uc->q->bb[0], l->value.variable, row, NULL); } fs_error(LOG_ERR, "no variables bound"); return FS_RID_NULL; } rasqal_literal_type type = rasqal_literal_get_rdf_term_type(l); switch (type) { case RASQAL_LITERAL_URI: return fs_hash_uri((char *)raptor_uri_as_string(l->value.uri)); case RASQAL_LITERAL_UNKNOWN: case RASQAL_LITERAL_STRING: case RASQAL_LITERAL_XSD_STRING: { fs_rid attr = 0; if (l->datatype) { attr = fs_hash_uri((char *)raptor_uri_as_string(l->datatype)); } else if (l->language) { /* lang tags are normalised to upper case internally */ char *lang = g_ascii_strup((char *)l->language, -1); attr = fs_hash_literal(lang, 0); g_free(lang); } return fs_hash_literal((char *)rasqal_literal_as_string(l), attr); } case RASQAL_LITERAL_BLANK: { raptor_term_blank_value bnode; bnode.string = (unsigned char *)rasqal_literal_as_string(l); bnode.string_len = strlen((char *)bnode.string); return fs_bnode_id(uc->link, bnode); } case RASQAL_LITERAL_VARIABLE: case RASQAL_LITERAL_QNAME: case RASQAL_LITERAL_PATTERN: case RASQAL_LITERAL_BOOLEAN: case RASQAL_LITERAL_INTEGER: case RASQAL_LITERAL_INTEGER_SUBTYPE: case RASQAL_LITERAL_DECIMAL: case RASQAL_LITERAL_FLOAT: case RASQAL_LITERAL_DOUBLE: case RASQAL_LITERAL_DATETIME: case RASQAL_LITERAL_UDT: #if RASQAL_VERSION >= 929 case RASQAL_LITERAL_DATE: #endif break; } fs_error(LOG_ERR, "bad rasqal literal (type %d)", type); return FS_RID_NULL; }
static void store_stmt(void *user_data, const raptor_statement * statement) { fs_parse_stuff *data = (fs_parse_stuff *) user_data; char *subj = (char *) raptor_uri_as_string((raptor_uri *) statement->subject); char *pred; char *obj; fs_rid m, s, p, o; char tmpp[512]; m = data->model_hash; if (statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) { s = fs_bnode_id(data->link, statement->subject); subj = (char *) statement->subject; } else { s = fs_hash_uri(subj); } if (statement->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) { sprintf(tmpp, MEMBER_PREFIX "%d", *((int *)statement->predicate)); pred = tmpp; } else { pred = (char *) raptor_uri_as_string((raptor_uri *) statement->predicate); } p = fs_hash_uri(pred); fs_rid attr = fs_c.empty; if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL || statement->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) { obj = (char *) statement->object; if (statement->object_literal_language) { char *langtag = (char *)statement->object_literal_language; for (char *pos = langtag; *pos; pos++) { if (islower(*pos)) { *pos = toupper(*pos); } } attr = fs_hash_literal(langtag, 0); buffer_res(data->link, data->segments, attr, langtag, fs_c.empty, data->dryrun); } else if (raptor_uri_as_string(statement->object_literal_datatype)) { char *dt = (char *)raptor_uri_as_string(statement->object_literal_datatype); attr = fs_hash_uri(dt); buffer_res(data->link, data->segments, attr, dt, FS_RID_NULL, data->dryrun); } o = fs_hash_literal(obj, attr); } else if (statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) { o = fs_bnode_id(data->link, statement->object); obj = (char *) statement->object; } else { obj = (char *) raptor_uri_as_string((raptor_uri *) statement-> object); attr = FS_RID_NULL; o = fs_hash_uri(obj); } buffer_res(data->link, data->segments, s, subj, FS_RID_NULL, data->dryrun); buffer_res(data->link, data->segments, p, pred, FS_RID_NULL, data->dryrun); buffer_res(data->link, data->segments, o, obj, attr, data->dryrun); fs_rid tbuf[4] = { m, s, p, o }; retry_write: if (write(data->quad_fd, tbuf, sizeof(tbuf)) == -1) { fs_error(LOG_ERR, "failed to buffer quad to fd %d (0x%x): %s", data->quad_fd, data->quad_fd, strerror(errno)); if (errno == EAGAIN || errno == EINTR || errno == ENOSPC) { sleep(5); goto retry_write; } } if (data->verbosity > 2) { fprintf(stderr, "%016llx %016llx %016llx %016llx\n", m, s, p, o); } data->count_trip++; total_triples_parsed++; if (data->verbosity && total_triples_parsed % 10000 == 0) { printf("Pass 1, processed %d triples\r", total_triples_parsed); fflush(stdout); } if (total_triples_parsed == FS_CHUNK_SIZE) { if (data->verbosity) printf("Pass 1, processed %d triples (%d)\n", FS_CHUNK_SIZE, data->count_trip); *(data->ext_count) += process_quads(data); data->last_count = data->count_trip; total_triples_parsed = 0; gettimeofday(&then_last, 0); } }