END_TEST START_TEST (check_fs_rid_vector_uniq) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 500; i++) { fs_rid_vector_append(v, i); fs_rid_vector_append(v, i); } fs_rid_vector_append(v,FS_RID_NULL); fail_if(fs_rid_vector_length(v) != (((500 - 100)*2) + 1), "fs_rid_vector_length failed"); fs_rid_vector_uniq(v, 0); //Not remove nulls fail_if(fs_rid_vector_length(v) != ((500 - 100) + 1), "fs_rid_vector_length failed"); for (i=1; i < fs_rid_vector_length(v) - 1; i++) { fail_if(v->data[i] == v->data[i-1], "unique values failed [%d %d]",v->data[i],v->data[i-1]); } fail_if(v->data[fs_rid_vector_length(v)-1] != FS_RID_NULL, "FS_RID_NULL isn't there."); v->data[123] = FS_RID_NULL; fs_rid_vector_uniq(v, 1); //remove nulls fail_if(fs_rid_vector_length(v) != ((500 - 100) -1), "fs_rid_vector_length failed"); fail_if(v->data[fs_rid_vector_length(v)-1] == FS_RID_NULL, "FS_RID_NULL should not be there %llx", v->data[fs_rid_vector_length(v)-1]); fail_if(v->data[123] == FS_RID_NULL, "FS_RID_NULL should not be there %llx", v->data[fs_rid_vector_length(v)-1]); fs_rid_vector_free(v); }
static int delete_rasqal_triple(struct update_context *ct, fs_rid_vector *vec[], rasqal_triple *triple, int row) { fs_rid m, s, p, o; if (triple->origin) { m = fs_hash_rasqal_literal(ct, triple->origin, row); if (m == FS_RID_NULL) return 1; } else if (ct->op->graph_uri) { m = fs_hash_uri((char *)raptor_uri_as_string(ct->op->graph_uri)); } else { /* m can be wildcard in the absence of GRAPH, WITH etc. */ m = FS_RID_NULL; } s = fs_hash_rasqal_literal(ct, triple->subject, row); if (s == FS_RID_NULL) return 1; p = fs_hash_rasqal_literal(ct, triple->predicate, row); if (p == FS_RID_NULL) return 1; o = fs_hash_rasqal_literal(ct, triple->object, row); if (o == FS_RID_NULL) return 1; /* as long as s, p, and o are bound, we can add this quad */ fs_rid_vector_append(vec[0], m); fs_rid_vector_append(vec[1], s); fs_rid_vector_append(vec[2], p); fs_rid_vector_append(vec[3], o); if (fs_rid_vector_length(vec[0]) > 999) { fsp_delete_quads_all(ct->link, vec); for (int s=0; s<4; s++) { fs_rid_vector_truncate(vec[s], 0); } } return 0; }
static void bind_results(const fs_rid quad[4], int tobind, fs_rid_vector **ret) { int col=0; if (tobind & FS_BIND_MODEL) { fs_rid_vector_append(ret[col++], quad[0]); } if (tobind & FS_BIND_SUBJECT) { fs_rid_vector_append(ret[col++], quad[1]); } if (tobind & FS_BIND_PREDICATE) { fs_rid_vector_append(ret[col++], quad[2]); } if (tobind & FS_BIND_OBJECT) { fs_rid_vector_append(ret[col++], quad[3]); } }
void fs_rid_vector_append_vector_no_nulls_lit(fs_rid_vector *v, fs_rid_vector *v2) { if (!v2) return; for (int j=0; j<v2->length; j++) { if (v2->data[j] != FS_RID_NULL && !FS_IS_LITERAL(v2->data[j])) { fs_rid_vector_append(v, v2->data[j]); } } }
/* UNION b onto a, returns a with b appended */ void fs_binding_union(fs_query *q, fs_binding *a, fs_binding *b) { const int alen = fs_binding_length(a); const int blen = fs_binding_length(b); a[0].vals->length = 0; for (int c=1; a[c].name && b[c].name; c++) { if (!a[c].bound && b[c].bound) { a[c].bound = 1; while (a[c].vals->length < alen) { fs_rid_vector_append(a[c].vals, FS_RID_NULL); } } else if (a[c].bound && !b[c].bound) { b[c].bound = 1; while (b[c].vals->length < blen) { fs_rid_vector_append(b[c].vals, FS_RID_NULL); } } fs_rid_vector_append_vector(a[c].vals, b[c].vals); } }
/* inplace quicksort on an array of rid_vectors */ void fs_binding_sort(fs_binding *b) { int scount = 0; int length = fs_binding_length(b); for (int i=0; b[i].name; i++) { if (b[i].sort) scount++; if (b[i].vals->length < length) { for (int j=b[i].vals->length; j<length; j++) { fs_rid_vector_append(b[i].vals, FS_RID_NULL); } } } if (!scount) { fs_error(LOG_WARNING, "fs_binding_sort() called with no sort " "columns set, ignoring"); return; } /* fill out the _ord column with integers in [0,n] */ b[0].vals->length = 0; for (int row=0; row<length; row++) { fs_rid_vector_append(b[0].vals, row); } if (length > 1) { #ifdef DEBUG_MERGE double then = fs_time(); #endif /* ctxt could include other stuff for optimisations */ struct sort_context ctxt = { b }; fs_qsort_r(b[0].vals->data, length, sizeof(fs_rid), qsort_r_cmp, &ctxt); #ifdef DEBUG_MERGE double now = fs_time(); printf("sort took %f seconds\n", now - then); #endif } }
fs_rid_vector *fs_metadata_get_int_vector(fs_metadata *m, const char *prop) { fs_rid_vector *rv = fs_rid_vector_new(0); for (int e=0; e < m->length; e++) { if (!strcmp(m->entries[e].key, prop)) { fs_rid_vector_append(rv, atoll(m->entries[e].val)); } } return rv; }
void fs_rid_vector_append_set(fs_rid_vector *v, fs_rid_set *s) { if (!s) return; for (int hash=0; hash < FS_RID_SET_ENTRIES; hash++) { for (struct rid_entry *e=&(s->entry[hash]); e; e=e->next) { if (e->val != FS_RID_NULL) { fs_rid_vector_append(v, e->val); } } } }
fs_rid_vector *fs_rid_vector_intersect(int count, const fs_rid_vector *rv[]) { fs_rid_vector *ret = fs_rid_vector_new(0); for (int i=0; i<rv[0]->length; i++) { if (inter_sub(count, 1, rv, rv[0]->data[i])) { fs_rid_vector_append(ret, rv[0]->data[i]); } } return ret; }
END_TEST START_TEST (check_fs_rid_vector_sort) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 1e4; i++) { if (i % 2) { fs_rid_vector_append(v, i); fs_rid_vector_append(v, i+3); fs_rid_vector_append(v, i-10); } else { fs_rid_vector_append(v, i-1); fs_rid_vector_append(v, i+5); fs_rid_vector_append(v, i); } } fs_rid_vector_sort(v); fail_if(fs_rid_vector_length(v) != ((1e4 - 100) *3), "Length fail after sort"); for (i=100; i < 1e4; i++) { fail_if ( !fs_rid_vector_contains(v, i), "Contain failed after sort"); } for (i=0;i<fs_rid_vector_length(v) -1; i++) { fail_if (v->data[i] > v->data[i+1], "Sort does not match."); } fs_rid_vector_free(v); }
fs_binding *fs_binding_apply_filters(fs_query *q, int block, fs_binding *b, raptor_sequence *constr) { fs_binding *ret = fs_binding_copy(b); if (!constr) { /* if there's no constriants then we don't need to do anything */ return ret; } for (int col=0; b[col].name; col++) { ret[col].vals->length = 0; } int length = fs_binding_length(b); fs_binding *restore = q->bt; q->bt = b; /* TODO should prefetch lexical vals here */ /* expressions that have been optimised out will be replaces with NULL, * so we have to be careful here */ /* --------------------------- */ /* PREFETCH should go here XXX */ /* --------------------------- */ for (int row=0; row<length; row++) { for (int c=0; c<raptor_sequence_size(constr); c++) { rasqal_expression *e = raptor_sequence_get_at(constr, c); if (!e) continue; fs_value v = fs_expression_eval(q, row, block, e); #ifdef DEBUG_FILTER rasqal_expression_print(e, stdout); printf(" -> "); fs_value_print(v); printf("\n"); #endif if (v.valid & fs_valid_bit(FS_V_TYPE_ERROR) && v.lex) { q->warnings = g_slist_prepend(q->warnings, v.lex); } fs_value result = fn_ebv(v); /* its EBV is not true, so we skip to the next one */ if (result.valid & fs_valid_bit(FS_V_TYPE_ERROR) || !result.in) { continue; } for (int col=0; b[col].name; col++) { if (b[col].bound) { fs_rid_vector_append(ret[col].vals, b[col].vals->data[row]); } } } } q->bt = restore; return ret; }
void fs_binding_uniq(fs_binding *bi) { if (fs_binding_length(bi) < 2) { /* we don't need to do anything, code below assumes >= 1 row */ return; } fs_binding *b = fs_binding_copy_and_clear(bi); bi[0].vals->length = 0; #ifdef DEBUG_MERGE double then = fs_time(); #endif int length = fs_binding_length(b); int outrow = 1; for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, 0)); bi[column].bound = b[column].bound; b[column].sort = b[column].bound; } for (int row = 1; row < length; row++) { if (binding_row_compare(NULL, b, b, row, row-1, length, length) == 0) { continue; } for (int column = 1; b[column].name; column++) { fs_rid_vector_append(bi[column].vals, table_value(b, column, row)); } outrow++; } #ifdef DEBUG_MERGE double now = fs_time(); printf("uniq took %fs (%d->%d rows)\n", now-then, length, outrow); fs_binding_print(bi, stdout); #endif fs_binding_free(b); }
void fs_mhash_print(fs_mhash *mh, FILE *out, int verbosity) { if (!mh) { fs_error(LOG_CRIT, "tried to print NULL mhash"); return; } fs_mhash_entry e; fs_rid_vector *models = fs_rid_vector_new(0); fs_rid last_model = FS_RID_NULL; int entry = 0; int count = 0; fprintf(out, "mhash %s\n", mh->filename); fprintf(out, " count: %d\n", mh->count); fprintf(out, " size: %d\n", mh->size); fprintf(out, "\n"); lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET); while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) { if (e.val) { count++; if (verbosity > 0) { fprintf(out, "%8d %016llx %8d\n", entry, e.rid, e.val); } fs_rid_vector_append(models, e.rid); if (e.rid == last_model) { fprintf(out, "ERROR: %s model %016llx appears multiple times\n", mh->filename, e.rid); } last_model = e.rid; } entry++; } if (mh->count != count) { fprintf(out, "ERROR: %s header count %d != scanned count %d\n", mh->filename, mh->count, count); } int oldlength = models->length; fs_rid_vector_sort(models); fs_rid_vector_uniq(models, 0); if (models->length != oldlength) { fprintf(out, "ERROR: %s some models appear > 1 time\n", mh->filename); } }
void fs_binding_copy_row_unused(fs_binding *from, int row, int count, fs_binding *to) { for (int i=0; 1; i++) { if (!from[i].name) break; if (from[i].used) { continue; } fs_rid val; if (row < from[i].vals->length) { val = from[i].vals->data[row]; } else { val = FS_RID_NULL; } for (int j=0; j<count; j++) { fs_rid_vector_append(to[i].vals, val); } } }
END_TEST START_TEST (check_fs_rid_vector_truncate) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 1e4; i++) { fs_rid_vector_append(v, i); } fs_rid_vector_truncate(v, 100); fail_if(fs_rid_vector_length(v) != 100); fail_if(v->data[fs_rid_vector_length(v)-1] != 199); fs_rid_vector_truncate(v, 1); fail_if(v->data[fs_rid_vector_length(v)-1] != 100); fs_rid_vector_free(v); }
fs_binding *fs_binding_add(fs_binding *b, rasqal_variable *var, fs_rid val, int projected) { #ifdef DEBUG_BINDING if (strcmp(DEBUG_BINDING, name)) printf("@@ add("DEBUG_BINDING", %016llx, %d)\n", val, projected); #endif fs_binding *bv = fs_binding_get(b, var); if (bv) { fs_rid_vector_append(bv->vals, val); bv->bound = 1; bv->proj |= projected; bv->need_val |= projected; return bv; } long i; for (i=0; i < FS_BINDING_MAX_VARS && b[i].name; i++); if (i == FS_BINDING_MAX_VARS) { fs_error(LOG_ERR, "variable limit (%d) exceeded", FS_BINDING_MAX_VARS); return NULL; } b[i].name = g_strdup((char *)var->name); if (val != FS_RID_NULL) { if (b[i].vals) { fs_error(LOG_WARNING, "loosing pointer to rid_vector"); } b[i].vals = fs_rid_vector_new_from_args(1, val); b[i].bound = 1; } else { if (b[i].vals) { fs_error(LOG_WARNING, "loosing pointer to rid_vector"); } b[i].vals = fs_rid_vector_new(0); } b[i].proj = projected; b[i].need_val = projected; var->user_data = (void *)i; return b+i; }
END_TEST START_TEST (check_fs_rid_vector_grow) { fs_rid_vector *v = fs_rid_vector_new(0); fail_unless(v != NULL, "fs_rid_vector is NULL"); int i=0; for (i=100; i < 1e4; i++) { fs_rid_vector_append(v, i); } fail_if(fs_rid_vector_length(v) != (1e4 - 100)); fs_rid_vector_grow(v, 2e4); fail_if(fs_rid_vector_length(v) != 2e4); for (i=0;i<2e4;i++) { v->data[i] = i; } fail_if(v->size != 2e4); fs_rid_vector_free(v); }
void fs_rid_vector_append_vector(fs_rid_vector *v, fs_rid_vector *v2) { if (!v) return; if (!v2) return; if (v2->length > 4 && v->size - v->length < v2->length) { v->size += v2->length > 32 ? v2->length : 32; v->data = realloc(v->data, sizeof(fs_rid) * v->size); } if (v->size - v->length >= v2->length) { memcpy(&v->data[v->length], v2->data, sizeof(fs_rid) * v2->length); v->length += v2->length; return; } for (int j=0; j<v2->length; j++) { fs_rid_vector_append(v, v2->data[j]); } }
fs_rid_vector *fs_mhash_get_keys(fs_mhash *mh) { if (!mh) { fs_error(LOG_CRIT, "tried to get keys from NULL mhash"); return NULL; } fs_rid_vector *v = fs_rid_vector_new(0); fs_mhash_entry e; if (!mh->locked) flock(mh->fd, LOCK_SH); if (lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET) == -1) { fs_error(LOG_ERR, "seek error on mhash: %s", strerror(errno)); } while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) { if (e.val) fs_rid_vector_append(v, e.rid); } if (!mh->locked) flock(mh->fd, LOCK_UN); return v; }
static fs_rid_vector *rid_file(char *filename) { fs_rid_vector *rids = fs_rid_vector_new(0); FILE *fp = fopen(filename, "r"); if (!fp) { fs_error(LOG_ERR, "could not open “%s”: %s", filename, strerror(errno)); return rids; } while (!feof(fp) && !ferror(fp)) { char ridstr[21]; fs_rid rid; if (fscanf(fp, "%20s", ridstr) < 1) break; rid = strtoull(ridstr, NULL, 16); fs_rid_vector_append(rids, rid); } fclose(fp); return rids; }
int fs_clear(struct update_context *uc, char *graphuri) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid mrid; if (graphuri) { mrid = fs_hash_uri(graphuri); } else { graphuri = FS_DEFAULT_GRAPH; mrid = fs_c.default_graph; } fs_rid_vector_append(mvec, mrid); int errors = 0; if (fsp_delete_model_all(uc->link, mvec)) { errors++; add_message(uc, g_strdup_printf("Error while trying to delete %s", graphuri), 1); } else { add_message(uc, g_strdup_printf("Deleted <%s>", graphuri), 1); } fs_rid_vector_free(mvec); return errors; }
int fs_copy(struct update_context *uc, char *from, char *to) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid_vector *empty = fs_rid_vector_new(0); fs_rid fromrid, torid; if (from) { fromrid = fs_hash_uri(from); } else { from = FS_DEFAULT_GRAPH; fromrid = fs_c.default_graph; } if (to) { torid = fs_hash_uri(to); } else { to = FS_DEFAULT_GRAPH; torid = fs_c.default_graph; } if (fromrid == torid) { /*don't need to do anything */ fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } fs_rid_vector_append(mvec, fromrid); /* search for all the triples in from */ fs_rid_vector **results; fs_rid_vector *slot[4] = { mvec, empty, empty, empty }; /* see if there's any data in <from> */ fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT, slot, &results, -1, 1); if (!results || results[0]->length == 0) { if (results) { fs_rid_vector_free(results[0]); free(results); } fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("<%s> is empty, not copying", from), 1); return 1; } fs_rid_vector_free(results[0]); free(results); /* get the contents of <from> */ fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT, slot, &results, -1, -1); /* map old bnodes to new ones */ map_bnodes(uc, results[0]); map_bnodes(uc, results[1]); map_bnodes(uc, results[2]); /* delete <to> */ mvec->data[0] = torid; if (fsp_delete_model_all(uc->link, mvec)) { fs_rid_vector_free(mvec); fs_rid_vector_free(empty); add_message(uc, g_strdup_printf("Error while trying to delete %s", to), 1); return 1; } fs_rid_vector_free(mvec); fs_rid_vector_free(empty); /* insert <to> */ fs_resource tores; tores.lex = to; tores.attr= FS_RID_NULL; tores.rid = torid; fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores); insert_triples(uc, torid, results[0], results[1], results[2]); add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1); add_message(uc, g_strdup_printf("%d triples added, ?? removed", results[0]->length), 1); for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); return 0; }
int fs_add(struct update_context *uc, char *from, char *to) { fs_rid_vector *mvec = fs_rid_vector_new(0); fs_rid_vector *empty = fs_rid_vector_new(0); fs_rid fromrid, torid; if (from) { fromrid = fs_hash_uri(from); } else { from = FS_DEFAULT_GRAPH; fromrid = fs_c.default_graph; } if (to) { torid = fs_hash_uri(to); } else { to = FS_DEFAULT_GRAPH; torid = fs_c.default_graph; } if (fromrid == torid) { /*don't need to do anything */ add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } fs_rid_vector_append(mvec, fromrid); int errors = 0; /* search for all the triples in from */ fs_rid_vector **results; fs_rid_vector *slot[4] = { mvec, empty, empty, empty }; fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT, slot, &results, -1, -1); fs_rid_vector_free(mvec); fs_rid_vector_free(empty); if (!results || results[0]->length == 0) { /* there's nothing to add */ if (results) { for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); } add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, "0 triples added, 0 removed", 0); return 0; } map_bnodes(uc, results[0]); map_bnodes(uc, results[1]); map_bnodes(uc, results[2]); fs_resource tores; tores.lex = to; tores.attr= FS_RID_NULL; tores.rid = torid; fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores); insert_triples(uc, torid, results[0], results[1], results[2]); add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1); add_message(uc, g_strdup_printf("%d triples added, 0 removed", results[0]->length), 1); for (int i=0; i<3; i++) { fs_rid_vector_free(results[i]); } free(results); return errors; }
fs_binding *fs_binding_minus(fs_query *q, fs_binding *a, fs_binding *b) { if (a == NULL) { return NULL; } if (b == NULL) { /* a - 0 = a */ return fs_binding_copy(a); } fs_binding *c = fs_binding_copy(a); int inter = 0; /* do the tables intersect */ for (int i=0; a[i].name; i++) { a[i].sort = 0; b[i].sort = 0; c[i].sort = 0; c[i].vals->length = 0; } int bound_a = 0; int bound_b = 0; for (int i=1; a[i].name; i++) { if (a[i].bound) bound_a++; if (b[i].bound) bound_b++; if (a[i].bound || b[i].bound) { c[i].bound = 1; } if (a[i].bound && b[i].bound) { inter = 1; a[i].sort = 1; b[i].sort = 1; #ifdef DEBUG_MERGE printf("joining on %s\n", a[i].name); #endif } } /* a and b bound variables do not intersect, return c (copy of a) */ if (!inter) { #ifdef DEBUG_MERGE printf("remove nothing, result:\n"); fs_binding_print(c, stdout); #endif return c; } int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); /* sort the two sets of bindings so they can be merged linearly */ fs_binding_sort(a); fs_binding_sort(b); #ifdef DEBUG_MERGE printf("a: %d bindings\n", fs_binding_length(a)); fs_binding_print(a, stdout); printf("b: %d bindings\n", fs_binding_length(b)); fs_binding_print(b, stdout); #endif int apos = 0; int bpos = 0; int cmp; while (apos < length_a) { cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b); if (cmp == -1 || cmp == -2) { /* A and B aren't compatible, keep A row */ for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (a[col].bound) { fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } else { fs_rid_vector_append(c[col].vals, FS_RID_NULL); } } apos++; } else if (cmp == 0) { /* Both rows are equal (cmp == 0), skip A row in result */ #if DEBUG_MERGE > 1 printf("[I] Ar=%d, Br=%d", apos, bpos); #endif int range_a = apos+1; int range_b = bpos+1; while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++; while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++; apos = range_a; bpos = range_b; } else if (cmp == +1 || cmp == +2) { /* A and B aren't compatible, B sorts lower, skip B or B row is NULL */ bpos++; } else { fs_error(LOG_ERR, "cmp=%d, value out of range", cmp); } } /* clear the _ord columns */ a[0].vals->length = 0; b[0].vals->length = 0; #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(c)); fs_binding_print(c, stdout); #endif return c; }
fs_binding *fs_binding_join(fs_query *q, fs_binding *a, fs_binding *b, fs_join_type join) { if (a == NULL) { return fs_binding_copy(b); } if (b == NULL) { return fs_binding_copy(a); } fs_binding *c = fs_binding_copy(a); int inter = 0; /* do the tables intersect */ for (int i=0; a[i].name; i++) { a[i].sort = 0; b[i].sort = 0; c[i].sort = 0; c[i].vals->length = 0; } int bound_a = 0; int bound_b = 0; for (int i=1; a[i].name; i++) { if (a[i].bound) bound_a++; if (b[i].bound) bound_b++; if (a[i].bound || b[i].bound) { c[i].bound = 1; } if (a[i].bound && b[i].bound) { inter = 1; a[i].sort = 1; b[i].sort = 1; #ifdef DEBUG_MERGE printf("joining on %s\n", a[i].name); #endif } } /* a and b bound variables do not intersect, we can just dump results */ if (!inter) { int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); for (int i=1; a[i].name; i++) { if (!a[i].bound) { for (int j=0; j<length_a; j++) { fs_rid_vector_append(c[i].vals, FS_RID_NULL); } } else { fs_rid_vector_append_vector(c[i].vals, a[i].vals); } if (!b[i].bound) { for (int j=0; j<length_b; j++) { fs_rid_vector_append(c[i].vals, FS_RID_NULL); } } else { fs_rid_vector_append_vector(c[i].vals, b[i].vals); } } #ifdef DEBUG_MERGE printf("append all, result:\n"); fs_binding_print(c, stdout); #endif return c; } int length_a = fs_binding_length(a); int length_b = fs_binding_length(b); /* sort the two sets of bindings so they can be merged linearly */ fs_binding_sort(a); fs_binding_sort(b); #ifdef DEBUG_MERGE printf("a: %d bindings\n", fs_binding_length(a)); fs_binding_print(a, stdout); printf("b: %d bindings\n", fs_binding_length(b)); fs_binding_print(b, stdout); #endif /* If were running in restricted mode, truncate the binding tables */ if (q->flags & FS_QUERY_RESTRICTED) { int restricted = 0; fs_binding_truncate(a, q->soft_limit); if (length_a > fs_binding_length(a)) { length_a = fs_binding_length(a); restricted = 1; } fs_binding_truncate(b, q->soft_limit); if (length_b > fs_binding_length(b)) { length_b = fs_binding_length(b); restricted = 1; } if (restricted) { char *msg = "some results have been dropped to prevent overunning effort allocation"; q->warnings = g_slist_prepend(q->warnings, msg); } } int apos = 0; int bpos = 0; int cmp; while (apos < length_a) { if (join == FS_INNER && bpos >= length_b) break; cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b); if (cmp == -1) { /* A and B aren't compatible, A sorts lower, skip A or left join */ #if DEBUG_MERGE > 1 printf("[L] Ar=%d, Br=%d", apos, bpos); #endif if (join == FS_LEFT) { for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (a[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(a, col, apos)); #endif fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } else { #if DEBUG_MERGE > 1 printf(" %s=null", c[col].name); #endif fs_rid_vector_append(c[col].vals, FS_RID_NULL); } } } apos++; } else if (cmp == 0 || cmp == -2 || cmp == 2) { /* Both rows are equal (cmp == 0), or one row is null (cmp == -2, 2) */ /* Both rows match, find out what combinations bind and produce them */ #if DEBUG_MERGE > 1 printf("[I] Ar=%d, Br=%d", apos, bpos); #endif int range_a = apos+1; int range_b = bpos+1; while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++; while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++; int start_a = apos; int start_b = bpos; for (apos = start_a; apos<range_a; apos++) { for (bpos = start_b; bpos<range_b; bpos++) { for (int col=0; a[col].name; col++) { if (!c[col].need_val) { continue; } else if (!a[col].bound && !b[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=null", c[col].name); #endif fs_rid_vector_append(c[col].vals, FS_RID_NULL); } else if (a[col].bound) { /* if were left joining and A is NULL, we want the * value from B */ if (join == FS_LEFT && table_value(a, col, apos) == FS_RID_NULL && b[col].bound) { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(b, col, bpos)); #endif fs_rid_vector_append(c[col].vals, table_value(b, col, bpos)); } else { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(a, col, apos)); #endif fs_rid_vector_append(c[col].vals, table_value(a, col, apos)); } } else { #if DEBUG_MERGE > 1 printf(" %s=%016llx", c[col].name, table_value(b, col, bpos)); #endif fs_rid_vector_append(c[col].vals, table_value(b, col, bpos)); } } } } /* this is actually unneccesary because the for loop will do the * same thing, but it's clearer */ apos = range_a; bpos = range_b; } else if (cmp == +1) { /* A and B aren't compatible, B sorts lower, skip B */ bpos++; } else { fs_error(LOG_ERR, "cmp=%d, value out of range", cmp); } #if DEBUG_MERGE > 1 printf("\n"); #endif } /* clear the _ord columns */ a[0].vals->length = 0; b[0].vals->length = 0; #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(c)); fs_binding_print(c, stdout); #endif return c; }
xmlChar *get_uri(fsp_link *link, fs_rid rid) { if (cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); return (xmlChar *) resource.lex; } xmlChar *get_attr(fsp_link *link, fs_rid rid) { if (attr_cache[rid & CACHE_MASK].rid == rid) { return (xmlChar *) attr_cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource)); return (xmlChar *) resource.lex; } xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr) { if (cache[rid & CACHE_MASK].rid == rid) { *attr = cache[rid & CACHE_MASK].attr; return (xmlChar *) cache[rid & CACHE_MASK].lex; } fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid }; fs_resource resource; fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource); *attr = resource.attr; return (xmlChar *) resource.lex; } void resolve_triples(fsp_link *link, fs_rid_vector **rids) { int quads = rids[0]->length; fs_rid_vector *todo[segments]; fs_segment segment; for (segment = 0; segment < segments; ++segment) { todo[segment] = fs_rid_vector_new(0); } for (int c = 0; c < 3; ++c) { for (int k = 0; k < quads; ++k) { const fs_rid rid = rids[c]->data[k]; if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue; fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid); cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */ } } int length[segments]; fs_resource *resources[segments]; for (segment = 0; segment < segments; ++segment) { length[segment] = todo[segment]->length; resources[segment] = calloc(length[segment], sizeof(fs_resource)); } fsp_resolve_all(link, todo, resources); for (segment = 0; segment < segments; ++segment) { fs_resource *res = resources[segment]; for (int k = 0; k < length[segment]; ++k) { free(cache[res[k].rid & CACHE_MASK].lex); memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource)); } fs_rid_vector_free(todo[segment]); free(resources[segment]); } } void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml) { fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fs_rid_vector one = { .length = 1, .size = 1, .data = &model }; fs_rid_vector **results; double then; /* for time keeping */ then = fs_time(); fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT); time_bind_first += (fs_time() - then); while (results != NULL) { long length = results[0]->length; if (length == 0) break; then = fs_time(); resolve_triples(link, results); time_resolving += (fs_time() - then); then = fs_time(); for (int k = 0; k < length; ++k) { xmlTextWriterStartElement(xml, (xmlChar *) "triple"); for (int r = 0; r < 3; ++r) { fs_rid rid = results[r]->data[k]; if (FS_IS_BNODE(rid)) { unsigned long long node = FS_BNODE_NUM(rid); xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node); } else if (FS_IS_URI(rid)) { xmlChar *uri = get_uri(link, rid); xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri); } else if (FS_IS_LITERAL(rid)) { fs_rid attr; xmlChar *lex = get_literal(link, rid, &attr); if (attr == fs_c.empty) { xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex); } else if (FS_IS_URI(attr)) { xmlChar *type = get_uri(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral"); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type); xmlTextWriterEndElement(xml); } else if (FS_IS_LITERAL(attr)) { xmlChar *lang = get_attr(link, attr); xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral"); xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang); xmlTextWriterWriteString(xml, (xmlChar *) lex); xmlTextWriterEndElement(xml); } } } xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); } time_write_out += (fs_time() - then); fs_rid_vector_free(results[0]); fs_rid_vector_free(results[1]); fs_rid_vector_free(results[2]); free(results); then = fs_time(); fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT); time_bind_next += (fs_time() - then); } fsp_bind_done_all(link); } void dump_trix(fsp_link *link, xmlTextWriterPtr xml) { fs_rid_vector **models; fs_rid_vector none = { .length = 0, .size = 0, .data = 0 }; fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models); fs_rid_vector_sort(models[0]); fs_rid_vector_uniq(models[0], 1); long length = models[0]->length; for (int k = 0; k < length; ++k) { fs_rid model = models[0]->data[k]; xmlChar *model_uri = get_uri(link, model); xmlTextWriterStartElement(xml, (xmlChar *) "graph"); if (FS_IS_URI(model)) { xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri); } else { fs_error(LOG_WARNING, "model %lld is not a URI", model); } dump_model(link, model, xml); xmlTextWriterEndElement(xml); xmlTextWriterWriteString(xml, (xmlChar *) "\n"); printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out); } } void dump_file(fsp_link *link, char *filename) { xmlTextWriterPtr xml = xmlNewTextWriterFilename(filename, TRUE); if (!xml) { fs_error(LOG_ERR, "Couldn't write output file, giving up"); exit(4); } xmlTextWriterStartDocument(xml, NULL, NULL, NULL); xmlTextWriterStartElement(xml, (xmlChar *) "TriX"); dump_trix(link, xml); xmlTextWriterEndDocument(xml); /* also closes TriX */ xmlFreeTextWriter(xml); } int main(int argc, char *argv[]) { char *password = fsp_argv_password(&argc, argv); if (argc != 3) { fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]); exit(1); } fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO); if (!link) { fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]); exit(2); } fs_hash_init(fsp_hash_type(link)); segments = fsp_link_segments(link); dump_file(link, argv[2]); fsp_close_link(link); }
int main(int argc, char *argv[]) { int verbosity = 0; int dryrun = 0; char *password = NULL; char *format = "auto"; FILE *msg = stderr; char *optstring = "am:M:vnf:"; int c, opt_index = 0, help = 0; int files = 0, adding = 0; char *kb_name = NULL; char *model[argc], *uri[argc]; char *model_default = NULL; password = fsp_argv_password(&argc, argv); static struct option long_options[] = { { "add", 0, 0, 'a' }, { "model", 1, 0, 'm' }, { "model-default", 1, 0, 'M' }, { "verbose", 0, 0, 'v' }, { "dryrun", 0, 0, 'n' }, { "no-resources", 0, 0, 'R' }, { "no-quads", 0, 0, 'Q' }, { "format", 1, 0, 'f' }, { "help", 0, 0, 'h' }, { "version", 0, 0, 'V' }, { 0, 0, 0, 0 } }; for (int i= 0; i < argc; ++i) { model[i] = NULL; } int help_return = 1; while ((c = getopt_long (argc, argv, optstring, long_options, &opt_index)) != -1) { if (c == 'm') { model[files++] = optarg; } else if (c == 'M') { model_default = optarg; } else if (c == 'v') { verbosity++; } else if (c == 'a') { adding = 1; } else if (c == 'n') { dryrun |= FS_DRYRUN_DELETE | FS_DRYRUN_RESOURCES | FS_DRYRUN_QUADS; } else if (c == 'R') { dryrun |= FS_DRYRUN_RESOURCES; } else if (c == 'Q') { dryrun |= FS_DRYRUN_QUADS; } else if (c == 'f') { format = optarg; } else if (c == 'h') { help = 1; help_return = 0; } else if (c == 'V') { printf("%s, built for 4store %s\n", argv[0], GIT_REV); exit(0); } else { help = 1; } } if (verbosity > 0) { if (dryrun & FS_DRYRUN_DELETE) { printf("warning: not deleting old model\n"); } if (dryrun & FS_DRYRUN_RESOURCES) { printf("warning: not importing resource nodes\n"); } if (dryrun & FS_DRYRUN_QUADS) { printf("warning: not importing quad graph\n"); } } files = 0; for (int k = optind; k < argc; ++k) { if (!kb_name) { kb_name = argv[k]; } else { if (strchr(argv[k], ':')) { uri[files] = g_strdup(argv[k]); } else { uri[files] = (char *)raptor_uri_filename_to_uri_string(argv[k]); } if (!model[files]) { if (!model_default) { model[files] = uri[files]; } else { model[files] = model_default; } } files++; } } raptor_world *rw = raptor_new_world(); if (help || !kb_name || files == 0) { fprintf(stdout, "%s revision %s\n", argv[0], FS_FRONTEND_VER); fprintf(stdout, "Usage: %s <kbname> <rdf file/URI> ...\n", argv[0]); fprintf(stdout, " -v --verbose increase verbosity (can repeat)\n"); fprintf(stdout, " -a --add add data to models instead of replacing\n"); fprintf(stdout, " -m --model specify a model URI for the next RDF file\n"); fprintf(stdout, " -M --model-default specify a model URI for all RDF files\n"); fprintf(stdout, " -f --format specify an RDF syntax for the import\n"); fprintf(stdout, "\n available formats are:\n"); for (unsigned int i=0; 1; i++) { const raptor_syntax_description *desc = raptor_world_get_parser_description(rw, i); if (!desc) { break; } fprintf(stdout, " %12s - %s\n", desc->names[0], desc->label); } exit(help_return); } fsp_syslog_enable(); fsplink = fsp_open_link(kb_name, password, FS_OPEN_HINT_RW); if (!fsplink) { fs_error (LOG_ERR, "couldn't connect to “%s”", kb_name); exit(2); } const char *features = fsp_link_features(fsplink); int has_o_index = !(strstr(features, "no-o-index")); /* tweak */ fs_hash_init(fsp_hash_type(fsplink)); const int segments = fsp_link_segments(fsplink); int total_triples = 0; fs_import_timing timing[segments]; for (int seg = 0; seg < segments; seg++) { fsp_get_import_times(fsplink, seg, &timing[seg]); } gettimeofday(&then, 0); if (fsp_start_import_all(fsplink)) { fs_error(LOG_ERR, "aborting import"); exit(3); } #if 0 printf("press enter\n"); char foo; read(0, &foo, 1); #endif fs_rid_vector *mvec = fs_rid_vector_new(0); for (int f= 0; f < files; ++f) { fs_rid muri = fs_hash_uri(model[f]); fs_rid_vector_append(mvec, muri); } if (!adding) { if (verbosity) { printf("removing old data\n"); fflush(stdout); } if (!(dryrun & FS_DRYRUN_DELETE)) { if (fsp_delete_model_all(fsplink, mvec)) { fs_error(LOG_ERR, "model delete failed"); return 1; } for (int i=0; i<mvec->length; i++) { if (mvec->data[i] == fs_c.system_config) { fs_import_reread_config(); } } } fsp_new_model_all(fsplink, mvec); } fs_rid_vector_free(mvec); gettimeofday(&then_last, 0); for (int f = 0; f < files; ++f) { if (verbosity) { printf("Reading <%s>\n", uri[f]); if (strcmp(uri[f], model[f])) { printf(" into <%s>\n", model[f]); } fflush(stdout); } fs_import(fsplink, model[f], uri[f], format, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity) { fflush(stdout); } } double sthen = fs_time(); int ret = fs_import_commit(fsplink, verbosity, dryrun, has_o_index, msg, &total_triples); if (verbosity > 0) { printf("Updating index\n"); fflush(stdout); } fsp_stop_import_all(fsplink); if (verbosity > 0) { printf("Index update took %f seconds\n", fs_time()-sthen); } if (!ret) { gettimeofday(&now, 0); double diff = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) * 0.000001; if (verbosity && total_triples > 0) { printf("Imported %d triples, average %d triples/s\n", total_triples, (int)((double)total_triples/diff)); fflush(stdout); } } if (verbosity > 1) { printf("seg add_q\tadd_r\t\tcommit_q\tcommit_r\tremove\t\trebuild\t\twrite\n"); long long *tics = fsp_profile_write(fsplink); for (int seg = 0; seg < segments; seg++) { fs_import_timing newtimes; fsp_get_import_times(fsplink, seg, &newtimes); printf("%2d: %f\t%f\t%f\t%f\t%f\t%f\t%f\n", seg, newtimes.add_s - timing[seg].add_s, newtimes.add_r - timing[seg].add_r, newtimes.commit_q - timing[seg].commit_q, newtimes.commit_r - timing[seg].commit_r, newtimes.remove - timing[seg].remove, newtimes.rebuild - timing[seg].rebuild, tics[seg] * 0.001); } } fsp_close_link(fsplink); raptor_free_world(rw); return 0; }
/* return to = from [X] to, this is used to perform joins inside blocks, it * saves allocations by doing most operations inplace, unlike fs_binding_join */ void fs_binding_merge(fs_query *q, int block, fs_binding *from, fs_binding *to) { fs_binding *inter_f = NULL; /* the intersecting column */ fs_binding *inter_t = NULL; /* the intersecting column */ for (int i=0; from[i].name; i++) { from[i].sort = 0; to[i].sort = 0; } int used = 0; for (int i=1; from[i].name; i++) { if (!from[i].bound || !to[i].bound) continue; if (from[i].used) used++; if (from[i].bound && to[i].bound) { inter_f = from+i; inter_t = to+i; from[i].sort = 1; to[i].sort = 1; #ifdef DEBUG_MERGE printf("@@ join on %s\n", to[i].name); #endif } } /* from and to bound variables do not intersect, we can just dump results, under some circustances we need to do a combinatorial explosion */ if (!inter_f && (fs_binding_length(from) == 0)) { const int length_f = fs_binding_length(from); const int length_t = fs_binding_length(to); for (int i=1; from[i].name; i++) { if (to[i].bound && !from[i].bound) { if (from[i].vals) { fs_rid_vector_free(from[i].vals); } from[i].vals = fs_rid_vector_new(length_f); for (int d=0; d<length_f; d++) { from[i].vals->data[d] = FS_RID_NULL; } from[i].bound = 1; } if (!from[i].bound) continue; if (!to[i].bound) { if (to[i].vals) { fs_rid_vector_free(to[i].vals); } to[i].vals = fs_rid_vector_new(length_t); for (int d=0; d<length_t; d++) { to[i].vals->data[d] = FS_RID_NULL; } } fs_rid_vector_append_vector(to[i].vals, from[i].vals); to[i].bound = 1; } #ifdef DEBUG_MERGE printf("append all, result:\n"); fs_binding_print(to, stdout); #endif return; } /* If were running in restricted mode, truncate the binding tables */ if (q->flags & FS_QUERY_RESTRICTED) { fs_binding_truncate(from, q->soft_limit); fs_binding_truncate(to, q->soft_limit); } int length_t = fs_binding_length(to); int length_f = fs_binding_length(from); /* ms8: this list keeps track of the vars to replace */ GList *rep_list = NULL; for (int i=1; to[i].name; i++) { if (to+i == inter_t || to[i].used || to[i].bound) { /* do nothing */ #if DEBUG_MERGE > 1 printf("@@ preserve %s\n", to[i].name); #endif } else if (from[i].bound && !to[i].bound) { #if DEBUG_MERGE > 1 printf("@@ replace %s\n", from[i].name); #endif to[i].bound = 1; if (to[i].vals) { if (to[i].vals->length != length_t) { fs_rid_vector_free(to[i].vals); to[i].vals = fs_rid_vector_new(length_t); } } else { to[i].vals = fs_rid_vector_new(length_t); } for (int d=0; d<length_t; d++) { to[i].vals->data[d] = FS_RID_NULL; } rep_list = g_list_append(rep_list, GINT_TO_POINTER(i)); } } /* sort the two sets of bindings so they can be merged linearly */ if (inter_f) { fs_binding_sort(from); fs_binding_sort(to); } else { /* make sure the tables are not marked sorted */ from[0].vals->length = 0; to[0].vals->length = 0; } #ifdef DEBUG_MERGE printf("old: %d bindings\n", fs_binding_length(from)); fs_binding_print(from, stdout); printf("new: %d bindings\n", fs_binding_length(to)); fs_binding_print(to, stdout); #endif int fpos = 0; int tpos = 0; while (fpos < length_f || tpos < length_t) { if (q->flags & FS_QUERY_RESTRICTED && fs_binding_length(to) >= q->soft_limit) { char *msg = g_strdup("some results have been dropped to prevent overunning time allocation"); q->warnings = g_slist_prepend(q->warnings, msg); break; } int cmp; cmp = binding_row_compare(q, from, to, fpos, tpos, length_f, length_t); if (cmp == 0) { /* both rows match */ int fp, tp = tpos; for (fp = fpos; binding_row_compare(q, from, to, fp, tpos, length_f, length_t) == 0; fp++) { #if DEBUG_MERGE > 1 if (fp == DEBUG_CUTOFF) { printf("...\n"); } #endif for (tp = tpos; 1; tp++) { if (binding_row_compare(q, from, to, fp, tp, length_f, length_t) == 0) { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { printf("STEP %d, %d ", fp-fpos, tp-tpos); } #endif if (fp == fpos) { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { if (inter_f) { printf("REPL %llx\n", inter_f->vals->data[fp]); } else { printf("REPL ???\n"); } } #endif for (int c=1; to[c].name; c++) { if (!from[c].bound && !to[c].bound) continue; if (from[c].bound && table_value(from, c, fp) == FS_RID_NULL) { continue; } if (from[c].bound && fp < from[c].vals->length) { long wrow = to[0].vals->length ? to[0].vals->data[tp] : tp; to[c].vals->data[wrow] = table_value(from, c, fp); if (to[c].vals->length <= tp) { to[c].vals->length = tp+1; } } } } else { #if DEBUG_MERGE > 1 if (fp < DEBUG_CUTOFF) { printf("ADD\n"); } #endif for (int c=1; to[c].name; c++) { if (!from[c].bound && !to[c].bound) continue; if (from[c].bound && fp < from[c].vals->length) { fs_rid_vector_append(to[c].vals, table_value(from, c, fp)); } else { fs_rid_vector_append(to[c].vals, table_value(to, c, tp)); } } } } else { break; } } } tpos = tp; fpos = fp; } else if (cmp <= -1) { fpos++; } else if (cmp >= 1) { tpos++; } else { fs_error(LOG_CRIT, "unknown compare state %d in binding", cmp); } } /* clear the _ord columns */ from[0].vals->length = 0; to[0].vals->length = 0; /* ms8: INIT code to clean up rows that where not replaced */ if (rep_list) { unsigned char *to_del = fs_new_bit_array(length_t); int to_del_count = 0; while(rep_list) { int col_r = GPOINTER_TO_INT(rep_list->data); rep_list = g_list_next(rep_list); for (int d=0; d<length_t; d++) { if (to[col_r].vals->data[d] == FS_RID_NULL) { fs_bit_array_set(to_del, d, 0); to_del_count++; } } } g_list_free(rep_list); if (to_del_count) { int vars = 0; for (int i=1; to[i].name; i++) vars++; fs_rid_vector **clean = calloc(vars, sizeof(fs_rid_vector *)); for (int i=0;i<vars;i++) clean[i] = fs_rid_vector_new(0); for (int d = 0;d<length_t;d++) { if (fs_bit_array_get(to_del,d)) { for (int i=0;i<vars;i++) { fs_rid_vector_append(clean[i],to[i+1].vals->data[d]); } } } for (int i=1;i<=vars;i++) { free(to[i].vals->data); to[i].vals->data = clean[i-1]->data; to[i].vals->length = clean[i-1]->length; to[i].vals->size = clean[i-1]->size; free(clean[i-1]); } free(clean); } fs_bit_array_destroy(to_del); } /* ms8: END code to clean up rows that where not replaced */ #ifdef DEBUG_MERGE printf("result: %d bindings\n", fs_binding_length(to)); fs_binding_print(to, stdout); #endif }