Example #1
0
END_TEST


START_TEST (check_fs_rid_vector_uniq)
{
  fs_rid_vector *v = fs_rid_vector_new(0);
  fail_unless(v != NULL, "fs_rid_vector is NULL");
  int i=0;
  for (i=100; i < 500; i++) {
      fs_rid_vector_append(v, i);
      fs_rid_vector_append(v, i);
  }
  fs_rid_vector_append(v,FS_RID_NULL);
  fail_if(fs_rid_vector_length(v) != (((500 - 100)*2) + 1), "fs_rid_vector_length failed");
  fs_rid_vector_uniq(v, 0); //Not remove nulls
  fail_if(fs_rid_vector_length(v) != ((500 - 100) + 1), "fs_rid_vector_length failed");
  for (i=1; i < fs_rid_vector_length(v) - 1; i++) {
      fail_if(v->data[i] == v->data[i-1], "unique values failed [%d %d]",v->data[i],v->data[i-1]);
  }
  fail_if(v->data[fs_rid_vector_length(v)-1] != FS_RID_NULL, "FS_RID_NULL isn't there.");
  v->data[123] = FS_RID_NULL;
  fs_rid_vector_uniq(v, 1); //remove nulls
  fail_if(fs_rid_vector_length(v) != ((500 - 100) -1), "fs_rid_vector_length failed");
  fail_if(v->data[fs_rid_vector_length(v)-1] == FS_RID_NULL, "FS_RID_NULL should not be there %llx",
          v->data[fs_rid_vector_length(v)-1]);
  fail_if(v->data[123] == FS_RID_NULL, "FS_RID_NULL should not be there %llx",
          v->data[fs_rid_vector_length(v)-1]);
  fs_rid_vector_free(v);
}
Example #2
0
static int delete_rasqal_triple(struct update_context *ct, fs_rid_vector *vec[], rasqal_triple *triple, int row)
{
    fs_rid m, s, p, o;

    if (triple->origin) {
        m = fs_hash_rasqal_literal(ct, triple->origin, row);
        if (m == FS_RID_NULL) return 1;
    } else if (ct->op->graph_uri) {
        m = fs_hash_uri((char *)raptor_uri_as_string(ct->op->graph_uri));
    } else {
        /* m can be wildcard in the absence of GRAPH, WITH etc. */
        m = FS_RID_NULL;
    }
    s = fs_hash_rasqal_literal(ct, triple->subject, row);
    if (s == FS_RID_NULL) return 1;
    p = fs_hash_rasqal_literal(ct, triple->predicate, row);
    if (p == FS_RID_NULL) return 1;
    o = fs_hash_rasqal_literal(ct, triple->object, row);
    if (o == FS_RID_NULL) return 1;

    /* as long as s, p, and o are bound, we can add this quad */
    fs_rid_vector_append(vec[0], m);
    fs_rid_vector_append(vec[1], s);
    fs_rid_vector_append(vec[2], p);
    fs_rid_vector_append(vec[3], o);

    if (fs_rid_vector_length(vec[0]) > 999) {
        fsp_delete_quads_all(ct->link, vec);
        for (int s=0; s<4; s++) {
            fs_rid_vector_truncate(vec[s], 0);
        }
    }

    return 0;
}
Example #3
0
static void bind_results(const fs_rid quad[4], int tobind, fs_rid_vector **ret)
{
    int col=0;
    if (tobind & FS_BIND_MODEL) {
	fs_rid_vector_append(ret[col++], quad[0]);
    }
    if (tobind & FS_BIND_SUBJECT) {
	fs_rid_vector_append(ret[col++], quad[1]);
    }
    if (tobind & FS_BIND_PREDICATE) {
	fs_rid_vector_append(ret[col++], quad[2]);
    }
    if (tobind & FS_BIND_OBJECT) {
	fs_rid_vector_append(ret[col++], quad[3]);
    }
}
Example #4
0
void fs_rid_vector_append_vector_no_nulls_lit(fs_rid_vector *v, fs_rid_vector *v2)
{
    if (!v2) return;

    for (int j=0; j<v2->length; j++) {
	if (v2->data[j] != FS_RID_NULL && !FS_IS_LITERAL(v2->data[j])) {
	    fs_rid_vector_append(v, v2->data[j]);
	}
    }
}
Example #5
0
/* UNION b onto a, returns a with b appended */
void fs_binding_union(fs_query *q, fs_binding *a, fs_binding *b)
{
    const int alen = fs_binding_length(a);
    const int blen = fs_binding_length(b);
    a[0].vals->length = 0;
    for (int c=1; a[c].name && b[c].name; c++) {
        if (!a[c].bound && b[c].bound) {
            a[c].bound = 1;
            while (a[c].vals->length < alen) {
                fs_rid_vector_append(a[c].vals, FS_RID_NULL);
            }
        } else if (a[c].bound && !b[c].bound) {
            b[c].bound = 1;
            while (b[c].vals->length < blen) {
                fs_rid_vector_append(b[c].vals, FS_RID_NULL);
            }
        }
        fs_rid_vector_append_vector(a[c].vals, b[c].vals);
    }
}
Example #6
0
/* inplace quicksort on an array of rid_vectors */
void fs_binding_sort(fs_binding *b)
{
    int scount = 0;
    int length = fs_binding_length(b);

    for (int i=0; b[i].name; i++) {
	if (b[i].sort) scount++;
        if (b[i].vals->length < length) {
            for (int j=b[i].vals->length; j<length; j++) {
                fs_rid_vector_append(b[i].vals, FS_RID_NULL);
            }
        }
    }
    if (!scount) {
	fs_error(LOG_WARNING, "fs_binding_sort() called with no sort "
			      "columns set, ignoring");

	return;
    }

    /* fill out the _ord column with integers in [0,n] */
    b[0].vals->length = 0;
    for (int row=0; row<length; row++) {
        fs_rid_vector_append(b[0].vals, row);
    }

    if (length > 1) {
#ifdef DEBUG_MERGE
        double then = fs_time();
#endif

        /* ctxt could include other stuff for optimisations */
        struct sort_context ctxt = { b };
        fs_qsort_r(b[0].vals->data, length, sizeof(fs_rid), qsort_r_cmp, &ctxt);

#ifdef DEBUG_MERGE
        double now = fs_time();
        printf("sort took %f seconds\n", now - then);
#endif
    }
}
Example #7
0
fs_rid_vector *fs_metadata_get_int_vector(fs_metadata *m, const char *prop)
{
    fs_rid_vector *rv = fs_rid_vector_new(0);

    for (int e=0; e < m->length; e++) {
        if (!strcmp(m->entries[e].key, prop)) {
            fs_rid_vector_append(rv, atoll(m->entries[e].val));
        }
    }

    return rv;
}
Example #8
0
void fs_rid_vector_append_set(fs_rid_vector *v, fs_rid_set *s)
{
    if (!s) return;

    for (int hash=0; hash < FS_RID_SET_ENTRIES; hash++) {
	for (struct rid_entry *e=&(s->entry[hash]); e; e=e->next) {
	    if (e->val != FS_RID_NULL) {
		fs_rid_vector_append(v, e->val);
	    }
	}
    }
}
Example #9
0
fs_rid_vector *fs_rid_vector_intersect(int count, const fs_rid_vector *rv[])
{
    fs_rid_vector *ret = fs_rid_vector_new(0);

    for (int i=0; i<rv[0]->length; i++) {
	if (inter_sub(count, 1, rv, rv[0]->data[i])) {
	    fs_rid_vector_append(ret, rv[0]->data[i]);
	}
    }

    return ret;
}
Example #10
0
END_TEST


START_TEST (check_fs_rid_vector_sort)
{
  fs_rid_vector *v = fs_rid_vector_new(0);
  fail_unless(v != NULL, "fs_rid_vector is NULL");
  int i=0;
  for (i=100; i < 1e4; i++) {
      if (i % 2) {
          fs_rid_vector_append(v, i);
          fs_rid_vector_append(v, i+3);
          fs_rid_vector_append(v, i-10);
      } else {
          fs_rid_vector_append(v, i-1);
          fs_rid_vector_append(v, i+5);
          fs_rid_vector_append(v, i);
      }
  }
  fs_rid_vector_sort(v);
  fail_if(fs_rid_vector_length(v) != ((1e4 - 100) *3), "Length fail after sort");
  for (i=100; i < 1e4; i++) {
      fail_if ( !fs_rid_vector_contains(v, i), "Contain failed after sort");
  }
  for (i=0;i<fs_rid_vector_length(v) -1; i++) {
      fail_if (v->data[i] > v->data[i+1], "Sort does not match.");
  }
  fs_rid_vector_free(v);
}
Example #11
0
fs_binding *fs_binding_apply_filters(fs_query *q, int block, fs_binding *b, raptor_sequence *constr)
{
    fs_binding *ret = fs_binding_copy(b);
    if (!constr) {
        /* if there's no constriants then we don't need to do anything */

        return ret;
    }
    for (int col=0; b[col].name; col++) {
        ret[col].vals->length = 0;
    }
    int length = fs_binding_length(b);
    fs_binding *restore = q->bt;
    q->bt = b;
    /* TODO should prefetch lexical vals here */
    /* expressions that have been optimised out will be replaces with NULL,
     * so we have to be careful here */
/* --------------------------- */
/* PREFETCH should go here XXX */
/* --------------------------- */
    for (int row=0; row<length; row++) {
        for (int c=0; c<raptor_sequence_size(constr); c++) {
            rasqal_expression *e =
                raptor_sequence_get_at(constr, c);
            if (!e) continue;

            fs_value v = fs_expression_eval(q, row, block, e);
#ifdef DEBUG_FILTER
            rasqal_expression_print(e, stdout);
            printf(" -> ");
            fs_value_print(v);
            printf("\n");
#endif
            if (v.valid & fs_valid_bit(FS_V_TYPE_ERROR) && v.lex) {
                q->warnings = g_slist_prepend(q->warnings, v.lex);
            }
            fs_value result = fn_ebv(v);
            /* its EBV is not true, so we skip to the next one */
            if (result.valid & fs_valid_bit(FS_V_TYPE_ERROR) || !result.in) {
                continue;
            }
            for (int col=0; b[col].name; col++) {
                if (b[col].bound) {
                    fs_rid_vector_append(ret[col].vals, b[col].vals->data[row]);
                }
            }
        }
    }
    q->bt = restore;

    return ret;
}
Example #12
0
void fs_binding_uniq(fs_binding *bi)
{
    if (fs_binding_length(bi) < 2) {
        /* we don't need to do anything, code below assumes >= 1 row */
        return;
    }

    fs_binding *b = fs_binding_copy_and_clear(bi);

    bi[0].vals->length = 0;

#ifdef DEBUG_MERGE
    double then = fs_time();
#endif
    int length = fs_binding_length(b);

    int outrow = 1;
    for (int column = 1; b[column].name; column++) {
        fs_rid_vector_append(bi[column].vals, table_value(b, column, 0));
        bi[column].bound = b[column].bound;
        b[column].sort = b[column].bound;
    }
    for (int row = 1; row < length; row++) {
	if (binding_row_compare(NULL, b, b, row, row-1, length, length) == 0) {
	    continue;
	}
	for (int column = 1; b[column].name; column++) {
            fs_rid_vector_append(bi[column].vals, table_value(b, column, row));
	}
	outrow++;
    }

#ifdef DEBUG_MERGE
    double now = fs_time();
    printf("uniq took %fs (%d->%d rows)\n", now-then, length, outrow);
    fs_binding_print(bi, stdout);
#endif
    fs_binding_free(b);
}
Example #13
0
void fs_mhash_print(fs_mhash *mh, FILE *out, int verbosity)
{
    if (!mh) {
        fs_error(LOG_CRIT, "tried to print NULL mhash");

        return;
    }
    fs_mhash_entry e;
    fs_rid_vector *models = fs_rid_vector_new(0);
    fs_rid last_model = FS_RID_NULL;
    int entry = 0;
    int count = 0;

    fprintf(out, "mhash %s\n", mh->filename);
    fprintf(out, "  count: %d\n", mh->count);
    fprintf(out, "  size: %d\n", mh->size);
    fprintf(out, "\n");

    lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET);
    while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) {
        if (e.val) {
            count++;
            if (verbosity > 0) {
                fprintf(out, "%8d %016llx %8d\n", entry, e.rid, e.val);
            }
            fs_rid_vector_append(models, e.rid);
            if (e.rid == last_model) {
                fprintf(out, "ERROR: %s model %016llx appears multiple times\n",
                        mh->filename, e.rid);
            }
            last_model = e.rid;
        }
        entry++;
    }

    if (mh->count != count) {
        fprintf(out, "ERROR: %s header count %d != scanned count %d\n",
                mh->filename, mh->count, count);
    }

    int oldlength = models->length;
    fs_rid_vector_sort(models);
    fs_rid_vector_uniq(models, 0);
    if (models->length != oldlength) {
        fprintf(out, "ERROR: %s some models appear > 1 time\n",
                mh->filename);
    }
}
Example #14
0
void fs_binding_copy_row_unused(fs_binding *from, int row, int count, fs_binding *to)
{
    for (int i=0; 1; i++) {
	if (!from[i].name) break;
	if (from[i].used) {
	    continue;
	}
	fs_rid val;
	if (row < from[i].vals->length) {
	    val = from[i].vals->data[row];
	} else {
	    val = FS_RID_NULL;
	}
	for (int j=0; j<count; j++) {
	    fs_rid_vector_append(to[i].vals, val);
	}
    }
}
Example #15
0
END_TEST


START_TEST (check_fs_rid_vector_truncate)
{
  fs_rid_vector *v = fs_rid_vector_new(0);
  fail_unless(v != NULL, "fs_rid_vector is NULL");
  int i=0;
  for (i=100; i < 1e4; i++) {
      fs_rid_vector_append(v, i);
  }
  fs_rid_vector_truncate(v, 100);
  fail_if(fs_rid_vector_length(v) != 100);
  fail_if(v->data[fs_rid_vector_length(v)-1] != 199);
  fs_rid_vector_truncate(v, 1);
  fail_if(v->data[fs_rid_vector_length(v)-1] != 100);
  fs_rid_vector_free(v);
}
Example #16
0
fs_binding *fs_binding_add(fs_binding *b, rasqal_variable *var, fs_rid val, int projected)
{
#ifdef DEBUG_BINDING
    if (strcmp(DEBUG_BINDING, name)) printf("@@ add("DEBUG_BINDING", %016llx, %d)\n", val, projected);
#endif
    fs_binding *bv = fs_binding_get(b, var);
    if (bv) {
        fs_rid_vector_append(bv->vals, val);
        bv->bound = 1;
        bv->proj |= projected;
        bv->need_val |= projected;

        return bv;
    }

    long i;
    for (i=0; i < FS_BINDING_MAX_VARS && b[i].name; i++);

    if (i == FS_BINDING_MAX_VARS) {
	fs_error(LOG_ERR, "variable limit (%d) exceeded",
		FS_BINDING_MAX_VARS);

	return NULL;
    }

    b[i].name = g_strdup((char *)var->name);
    if (val != FS_RID_NULL) {
        if (b[i].vals) {
            fs_error(LOG_WARNING, "loosing pointer to rid_vector");
        }
        b[i].vals = fs_rid_vector_new_from_args(1, val);
        b[i].bound = 1;
    } else {
        if (b[i].vals) {
            fs_error(LOG_WARNING, "loosing pointer to rid_vector");
        }
        b[i].vals = fs_rid_vector_new(0);
    }
    b[i].proj = projected;
    b[i].need_val = projected;
    var->user_data = (void *)i;

    return b+i;
}
Example #17
0
END_TEST


START_TEST (check_fs_rid_vector_grow)
{
  fs_rid_vector *v = fs_rid_vector_new(0);
  fail_unless(v != NULL, "fs_rid_vector is NULL");
  int i=0;
  for (i=100; i < 1e4; i++) {
      fs_rid_vector_append(v, i);
  }
  fail_if(fs_rid_vector_length(v) != (1e4 - 100));
  fs_rid_vector_grow(v, 2e4);
  fail_if(fs_rid_vector_length(v) != 2e4);
  for (i=0;i<2e4;i++) {
      v->data[i] = i;
  }
  fail_if(v->size != 2e4);
  fs_rid_vector_free(v);
}
Example #18
0
void fs_rid_vector_append_vector(fs_rid_vector *v, fs_rid_vector *v2)
{
    if (!v) return;
    if (!v2) return;

    if (v2->length > 4 && v->size - v->length < v2->length) {
	v->size += v2->length > 32 ? v2->length : 32;
	v->data = realloc(v->data, sizeof(fs_rid) * v->size);
    }

    if (v->size - v->length >= v2->length) {
	memcpy(&v->data[v->length], v2->data, sizeof(fs_rid) * v2->length);
	v->length += v2->length;

	return;
    }

    for (int j=0; j<v2->length; j++) {
	fs_rid_vector_append(v, v2->data[j]);
    }
}
Example #19
0
fs_rid_vector *fs_mhash_get_keys(fs_mhash *mh)
{
    if (!mh) {
        fs_error(LOG_CRIT, "tried to get keys from NULL mhash");

        return NULL;
    }
    fs_rid_vector *v = fs_rid_vector_new(0);

    fs_mhash_entry e;

    if (!mh->locked) flock(mh->fd, LOCK_SH);
    if (lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET) == -1) {
        fs_error(LOG_ERR, "seek error on mhash: %s", strerror(errno));
    }
    while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) {
        if (e.val) fs_rid_vector_append(v, e.rid);
    }
    if (!mh->locked) flock(mh->fd, LOCK_UN);

    return v;
}
Example #20
0
static fs_rid_vector *rid_file(char *filename)
{
  fs_rid_vector *rids = fs_rid_vector_new(0);
  FILE *fp = fopen(filename, "r");

  if (!fp) {
    fs_error(LOG_ERR, "could not open “%s”: %s", filename, strerror(errno));
    return rids;
  }

  while (!feof(fp) && !ferror(fp)) {
    char ridstr[21];
    fs_rid rid;
    if (fscanf(fp, "%20s", ridstr) < 1) break;

    rid = strtoull(ridstr, NULL, 16);
    fs_rid_vector_append(rids, rid);
  }

  fclose(fp);

  return rids;
}
Example #21
0
int fs_clear(struct update_context *uc, char *graphuri)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid mrid;
    if (graphuri) {
        mrid = fs_hash_uri(graphuri);
    } else {
        graphuri = FS_DEFAULT_GRAPH;
        mrid = fs_c.default_graph;
    }
    fs_rid_vector_append(mvec, mrid);

    int errors = 0;
    if (fsp_delete_model_all(uc->link, mvec)) {
        errors++;
        add_message(uc, g_strdup_printf("Error while trying to delete %s", graphuri), 1);
    } else {
        add_message(uc, g_strdup_printf("Deleted <%s>", graphuri), 1);
    }
    fs_rid_vector_free(mvec);

    return errors;
}
Example #22
0
int fs_copy(struct update_context *uc, char *from, char *to)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid_vector *empty = fs_rid_vector_new(0);

    fs_rid fromrid, torid;
    if (from) {
        fromrid = fs_hash_uri(from);
    } else {
        from = FS_DEFAULT_GRAPH;
        fromrid = fs_c.default_graph;
    }
    if (to) {
        torid = fs_hash_uri(to);
    } else {
        to = FS_DEFAULT_GRAPH;
        torid = fs_c.default_graph;
    }

    if (fromrid == torid) {
        /*don't need to do anything */
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    fs_rid_vector_append(mvec, fromrid);

    /* search for all the triples in from */
    fs_rid_vector **results;
    fs_rid_vector *slot[4] = { mvec, empty, empty, empty };

    /* see if there's any data in <from> */
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT,
             slot, &results, -1, 1);
    if (!results || results[0]->length == 0) {
        if (results) {
            fs_rid_vector_free(results[0]);
            free(results);
        }
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("<%s> is empty, not copying", from), 1);

        return 1;
    }

    fs_rid_vector_free(results[0]);
    free(results);

    /* get the contents of <from> */
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT,
             slot, &results, -1, -1);

    /* map old bnodes to new ones */
    map_bnodes(uc, results[0]);
    map_bnodes(uc, results[1]);
    map_bnodes(uc, results[2]);

    /* delete <to> */
    mvec->data[0] = torid;
    if (fsp_delete_model_all(uc->link, mvec)) {
        fs_rid_vector_free(mvec);
        fs_rid_vector_free(empty);
        add_message(uc, g_strdup_printf("Error while trying to delete %s", to), 1);

        return 1;
    }

    fs_rid_vector_free(mvec);
    fs_rid_vector_free(empty);

    /* insert <to> */
    fs_resource tores;
    tores.lex = to;
    tores.attr= FS_RID_NULL;
    tores.rid = torid;
    fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores);
    
    insert_triples(uc, torid, results[0], results[1], results[2]);

    add_message(uc, g_strdup_printf("Copied <%s> to <%s>", from, to), 1);
    add_message(uc, g_strdup_printf("%d triples added, ?? removed", results[0]->length), 1);

    for (int i=0; i<3; i++) {
        fs_rid_vector_free(results[i]);
    }
    free(results);

    return 0;
}
Example #23
0
int fs_add(struct update_context *uc, char *from, char *to)
{
    fs_rid_vector *mvec = fs_rid_vector_new(0);
    fs_rid_vector *empty = fs_rid_vector_new(0);

    fs_rid fromrid, torid;
    if (from) {
        fromrid = fs_hash_uri(from);
    } else {
        from = FS_DEFAULT_GRAPH;
        fromrid = fs_c.default_graph;
    }
    if (to) {
        torid = fs_hash_uri(to);
    } else {
        to = FS_DEFAULT_GRAPH;
        torid = fs_c.default_graph;
    }

    if (fromrid == torid) {
        /*don't need to do anything */
        add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    fs_rid_vector_append(mvec, fromrid);

    int errors = 0;

    /* search for all the triples in from */
    fs_rid_vector **results;
    fs_rid_vector *slot[4] = { mvec, empty, empty, empty };
    fs_bind_cache_wrapper(uc->qs, NULL, 1, FS_BIND_BY_SUBJECT | FS_BIND_SUBJECT | FS_BIND_PREDICATE | FS_BIND_OBJECT,
             slot, &results, -1, -1);
    fs_rid_vector_free(mvec);
    fs_rid_vector_free(empty);

    if (!results || results[0]->length == 0) {
        /* there's nothing to add */
        if (results) {
            for (int i=0; i<3; i++) {
                fs_rid_vector_free(results[i]);
            }
            free(results);
        }
        add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
        add_message(uc, "0 triples added, 0 removed", 0);

        return 0;
    }

    map_bnodes(uc, results[0]);
    map_bnodes(uc, results[1]);
    map_bnodes(uc, results[2]);

    fs_resource tores;
    tores.lex = to;
    tores.attr= FS_RID_NULL;
    tores.rid = torid;
    fsp_res_import(uc->link, FS_RID_SEGMENT(torid, uc->segments), 1, &tores);
    
    insert_triples(uc, torid, results[0], results[1], results[2]);

    add_message(uc, g_strdup_printf("Added <%s> to <%s>", from, to), 1);
    add_message(uc, g_strdup_printf("%d triples added, 0 removed", results[0]->length), 1);

    for (int i=0; i<3; i++) {
        fs_rid_vector_free(results[i]);
    }
    free(results);

    return errors;
}
Example #24
0
fs_binding *fs_binding_minus(fs_query *q, fs_binding *a, fs_binding *b)
{
    if (a == NULL) {
        return NULL;
    }
    if (b == NULL) {
        /* a - 0 = a */
        return fs_binding_copy(a);
    }

    fs_binding *c = fs_binding_copy(a);
    int inter = 0;      /* do the tables intersect */

    for (int i=0; a[i].name; i++) {
	a[i].sort = 0;
	b[i].sort = 0;
	c[i].sort = 0;
        c[i].vals->length = 0;
    }
    int bound_a = 0;
    int bound_b = 0;
    for (int i=1; a[i].name; i++) {
        if (a[i].bound) bound_a++;
        if (b[i].bound) bound_b++;

        if (a[i].bound || b[i].bound) {
            c[i].bound = 1;
        }

	if (a[i].bound && b[i].bound) {
	    inter = 1;
	    a[i].sort = 1;
	    b[i].sort = 1;
#ifdef DEBUG_MERGE
            printf("joining on %s\n", a[i].name);
#endif
	}
    }

    /* a and b bound variables do not intersect, return c (copy of a) */
    if (!inter) {
#ifdef DEBUG_MERGE
        printf("remove nothing, result:\n");
        fs_binding_print(c, stdout);
#endif
	return c;
    }

    int length_a = fs_binding_length(a);
    int length_b = fs_binding_length(b);

    /* sort the two sets of bindings so they can be merged linearly */
    fs_binding_sort(a);
    fs_binding_sort(b);

#ifdef DEBUG_MERGE
    printf("a: %d bindings\n", fs_binding_length(a));
    fs_binding_print(a, stdout);
    printf("b: %d bindings\n", fs_binding_length(b));
    fs_binding_print(b, stdout);
#endif

    int apos = 0;
    int bpos = 0;
    int cmp;
    while (apos < length_a) {
	cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b);
        if (cmp == -1 || cmp == -2) {
            /* A and B aren't compatible, keep A row */
            for (int col=0; a[col].name; col++) {
                if (!c[col].need_val) {
                    continue;
                } else if (a[col].bound) {
                    fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                } else {
                    fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                }
            }
            apos++;
        } else if (cmp == 0) {
            /* Both rows are equal (cmp == 0), skip A row in result */
#if DEBUG_MERGE > 1
            printf("[I] Ar=%d, Br=%d", apos, bpos);
#endif
            int range_a = apos+1;
            int range_b = bpos+1;
            while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++;
            while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++;
            apos = range_a;
            bpos = range_b;
	} else if (cmp == +1 || cmp == +2) {
            /* A and B aren't compatible, B sorts lower, skip B or
               B row is NULL */
            bpos++;
	} else {
            fs_error(LOG_ERR, "cmp=%d, value out of range", cmp);
        }
    }

    /* clear the _ord columns */
    a[0].vals->length = 0;
    b[0].vals->length = 0;

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(c));
    fs_binding_print(c, stdout);
#endif

    return c;
}
Example #25
0
fs_binding *fs_binding_join(fs_query *q, fs_binding *a, fs_binding *b, fs_join_type join)
{
    if (a == NULL) {
        return fs_binding_copy(b);
    }
    if (b == NULL) {
        return fs_binding_copy(a);
    }

    fs_binding *c = fs_binding_copy(a);
    int inter = 0;      /* do the tables intersect */

    for (int i=0; a[i].name; i++) {
	a[i].sort = 0;
	b[i].sort = 0;
	c[i].sort = 0;
        c[i].vals->length = 0;
    }
    int bound_a = 0;
    int bound_b = 0;
    for (int i=1; a[i].name; i++) {
        if (a[i].bound) bound_a++;
        if (b[i].bound) bound_b++;

        if (a[i].bound || b[i].bound) {
            c[i].bound = 1;
        }

	if (a[i].bound && b[i].bound) {
	    inter = 1;
	    a[i].sort = 1;
	    b[i].sort = 1;
#ifdef DEBUG_MERGE
            printf("joining on %s\n", a[i].name);
#endif
	}
    }

    /* a and b bound variables do not intersect, we can just dump results */
    if (!inter) {
        int length_a = fs_binding_length(a);
        int length_b = fs_binding_length(b);
	for (int i=1; a[i].name; i++) {
            if (!a[i].bound) {
                for (int j=0; j<length_a; j++) {
                    fs_rid_vector_append(c[i].vals, FS_RID_NULL);
                }
            } else {
                fs_rid_vector_append_vector(c[i].vals, a[i].vals);
            }
            if (!b[i].bound) {
                for (int j=0; j<length_b; j++) {
                    fs_rid_vector_append(c[i].vals, FS_RID_NULL);
                }
            } else {
                fs_rid_vector_append_vector(c[i].vals, b[i].vals);
            }
	}
#ifdef DEBUG_MERGE
        printf("append all, result:\n");
        fs_binding_print(c, stdout);
#endif
	return c;
    }

    int length_a = fs_binding_length(a);
    int length_b = fs_binding_length(b);

    /* sort the two sets of bindings so they can be merged linearly */
    fs_binding_sort(a);
    fs_binding_sort(b);

#ifdef DEBUG_MERGE
    printf("a: %d bindings\n", fs_binding_length(a));
    fs_binding_print(a, stdout);
    printf("b: %d bindings\n", fs_binding_length(b));
    fs_binding_print(b, stdout);
#endif

    /* If were running in restricted mode, truncate the binding tables */
    if (q->flags & FS_QUERY_RESTRICTED) {
        int restricted = 0;
        fs_binding_truncate(a, q->soft_limit);
        if (length_a > fs_binding_length(a)) {
            length_a = fs_binding_length(a);
            restricted = 1;
        }
        fs_binding_truncate(b, q->soft_limit);
        if (length_b > fs_binding_length(b)) {
            length_b = fs_binding_length(b);
            restricted = 1;
        }
        if (restricted) {
            char *msg = "some results have been dropped to prevent overunning effort allocation";
            q->warnings = g_slist_prepend(q->warnings, msg);
        }
    }

    int apos = 0;
    int bpos = 0;
    int cmp;
    while (apos < length_a) {
        if (join == FS_INNER && bpos >= length_b) break;
	cmp = binding_row_compare(q, a, b, apos, bpos, length_a, length_b);
        if (cmp == -1) {
            /* A and B aren't compatible, A sorts lower, skip A or left join */
#if DEBUG_MERGE > 1
            printf("[L] Ar=%d, Br=%d", apos, bpos);
#endif
            if (join == FS_LEFT) {
                for (int col=0; a[col].name; col++) {
                    if (!c[col].need_val) {
                        continue;
                    } else if (a[col].bound) {
#if DEBUG_MERGE > 1
                        printf(" %s=%016llx", c[col].name, table_value(a, col, apos));
#endif
                        fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                    } else {
#if DEBUG_MERGE > 1
                        printf(" %s=null", c[col].name);
#endif
                        fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                    }
                }
            }
            apos++;
        } else if (cmp == 0 || cmp == -2 || cmp == 2) {
        /* Both rows are equal (cmp == 0), or one row is null (cmp == -2, 2) */
	    /* Both rows match, find out what combinations bind and produce them */
#if DEBUG_MERGE > 1
            printf("[I] Ar=%d, Br=%d", apos, bpos);
#endif
            int range_a = apos+1;
            int range_b = bpos+1;
            while (binding_row_compare(q, a, a, apos, range_a, length_a, length_a) == 0) range_a++;
            while (binding_row_compare(q, b, b, bpos, range_b, length_b, length_b) == 0) range_b++;
            int start_a = apos;
            int start_b = bpos;
            for (apos = start_a; apos<range_a; apos++) {
                for (bpos = start_b; bpos<range_b; bpos++) {
                    for (int col=0; a[col].name; col++) {
                        if (!c[col].need_val) {
                            continue;
                        } else if (!a[col].bound && !b[col].bound) {
#if DEBUG_MERGE > 1
                            printf(" %s=null", c[col].name);
#endif
                            fs_rid_vector_append(c[col].vals, FS_RID_NULL);
                        } else if (a[col].bound) {
                            /* if were left joining and A is NULL, we want the
                             * value from B */
                            if (join == FS_LEFT && table_value(a, col, apos) == FS_RID_NULL && b[col].bound) {
#if DEBUG_MERGE > 1
                                printf(" %s=%016llx", c[col].name, table_value(b, col, bpos));
#endif
                                fs_rid_vector_append(c[col].vals, table_value(b, col, bpos));
                            } else {
#if DEBUG_MERGE > 1
                                printf(" %s=%016llx", c[col].name, table_value(a, col, apos));
#endif
                                fs_rid_vector_append(c[col].vals, table_value(a, col, apos));
                            }
                        } else {
#if DEBUG_MERGE > 1
                            printf(" %s=%016llx", c[col].name, table_value(b, col, bpos));
#endif
                            fs_rid_vector_append(c[col].vals, table_value(b, col, bpos));
                        }
                    }
                }
            }
            /* this is actually unneccesary because the for loop will do the
             * same thing, but it's clearer */
            apos = range_a;
            bpos = range_b;
	} else if (cmp == +1) {
            /* A and B aren't compatible, B sorts lower, skip B */
            bpos++;
	} else {
            fs_error(LOG_ERR, "cmp=%d, value out of range", cmp);
        }
#if DEBUG_MERGE > 1
        printf("\n");
#endif
    }

    /* clear the _ord columns */
    a[0].vals->length = 0;
    b[0].vals->length = 0;

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(c));
    fs_binding_print(c, stdout);
#endif

    return c;
}
Example #26
0
File: dump.c Project: rafl/4store
xmlChar *get_uri(fsp_link *link, fs_rid rid)
{
  if (cache[rid & CACHE_MASK].rid == rid) {
    return (xmlChar *) cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;
  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);

  return (xmlChar *) resource.lex;
}

xmlChar *get_attr(fsp_link *link, fs_rid rid)
{
  if (attr_cache[rid & CACHE_MASK].rid == rid) {
    return (xmlChar *) attr_cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;

  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);
  memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource));

  return (xmlChar *) resource.lex;
}

xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr)
{
  if (cache[rid & CACHE_MASK].rid == rid) {
    *attr = cache[rid & CACHE_MASK].attr;
    return (xmlChar *) cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;

  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);
  *attr = resource.attr;

  return (xmlChar *) resource.lex;
}

void resolve_triples(fsp_link *link, fs_rid_vector **rids)
{
  int quads = rids[0]->length;
  fs_rid_vector *todo[segments];
  fs_segment segment;

  for (segment = 0; segment < segments; ++segment) {
    todo[segment] = fs_rid_vector_new(0);
  }
  for (int c = 0; c < 3; ++c) {
    for (int k = 0; k < quads; ++k) {
      const fs_rid rid = rids[c]->data[k];
      if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue;
      fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid);
      cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */
    }
  } 

  int length[segments];
  fs_resource *resources[segments];
  for (segment = 0; segment < segments; ++segment) {
    length[segment] = todo[segment]->length;
    resources[segment] = calloc(length[segment], sizeof(fs_resource));
  }

  fsp_resolve_all(link, todo, resources);

  for (segment = 0; segment < segments; ++segment) {
    fs_resource *res = resources[segment];
    for (int k = 0; k < length[segment]; ++k) {
      free(cache[res[k].rid & CACHE_MASK].lex);
      memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource));
    }

    fs_rid_vector_free(todo[segment]);
    free(resources[segment]);
  }
}

void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml)
{
  fs_rid_vector none = { .length = 0, .size = 0, .data = 0 };
  fs_rid_vector one = { .length = 1, .size = 1, .data = &model };

  fs_rid_vector **results;

  double then; /* for time keeping */

  then = fs_time();
  fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT);
  time_bind_first += (fs_time() - then);

  while (results != NULL) {

    long length = results[0]->length;

    if (length == 0) break;

    then = fs_time();
    resolve_triples(link, results);
    time_resolving += (fs_time() - then);

    then = fs_time();
    for (int k = 0; k < length; ++k) {
      xmlTextWriterStartElement(xml, (xmlChar *) "triple");

      for (int r = 0; r < 3; ++r) {
        fs_rid rid = results[r]->data[k];
        if (FS_IS_BNODE(rid)) {
          unsigned long long node = FS_BNODE_NUM(rid);
          xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node);
        } else if (FS_IS_URI(rid)) {
          xmlChar *uri = get_uri(link, rid);
          xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri);
        } else if (FS_IS_LITERAL(rid)) {
          fs_rid attr;
          xmlChar *lex = get_literal(link, rid, &attr);
          if (attr == fs_c.empty) {
            xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex);
          } else if (FS_IS_URI(attr)) {
            xmlChar *type = get_uri(link, attr);
            xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral");
            xmlTextWriterWriteString(xml, (xmlChar *) lex);
            xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type);
            xmlTextWriterEndElement(xml);
          } else if (FS_IS_LITERAL(attr)) {
            xmlChar *lang = get_attr(link, attr);
            xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral");
            xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang);
            xmlTextWriterWriteString(xml, (xmlChar *) lex);
            xmlTextWriterEndElement(xml);
          }
        }
      }
      xmlTextWriterEndElement(xml);
      xmlTextWriterWriteString(xml, (xmlChar *) "\n");

    }
    time_write_out += (fs_time() - then);

    fs_rid_vector_free(results[0]);
    fs_rid_vector_free(results[1]);
    fs_rid_vector_free(results[2]);
    free(results);

    then = fs_time();
    fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT);
    time_bind_next += (fs_time() - then);
  }

  fsp_bind_done_all(link);
}

void dump_trix(fsp_link *link, xmlTextWriterPtr xml)
{
  fs_rid_vector **models;
  fs_rid_vector none = { .length = 0, .size = 0, .data = 0 };

  fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models);

  fs_rid_vector_sort(models[0]);
  fs_rid_vector_uniq(models[0], 1);

  long length = models[0]->length;

  for (int k = 0; k < length; ++k) {
    fs_rid model = models[0]->data[k];
    xmlChar *model_uri = get_uri(link, model);
    xmlTextWriterStartElement(xml, (xmlChar *) "graph");
    if (FS_IS_URI(model)) {
      xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri);
    } else {
      fs_error(LOG_WARNING, "model %lld is not a URI", model);
    }

    dump_model(link, model, xml);
    xmlTextWriterEndElement(xml);
    xmlTextWriterWriteString(xml, (xmlChar *) "\n");
printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out);
  }
}

void dump_file(fsp_link *link, char *filename)
{
  xmlTextWriterPtr xml  = xmlNewTextWriterFilename(filename, TRUE);

  if (!xml) {
    fs_error(LOG_ERR, "Couldn't write output file, giving up");
    exit(4);
  }

  xmlTextWriterStartDocument(xml, NULL, NULL, NULL);
  xmlTextWriterStartElement(xml, (xmlChar *) "TriX");
  dump_trix(link, xml);
  xmlTextWriterEndDocument(xml); /* also closes TriX */
  xmlFreeTextWriter(xml);
}

int main(int argc, char *argv[])
{
  char *password = fsp_argv_password(&argc, argv);

  if (argc != 3) {
    fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER);
    fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]);
    exit(1);
  }

  fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO);

  if (!link) {
    fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]);
    exit(2);
  }

  fs_hash_init(fsp_hash_type(link));
  segments = fsp_link_segments(link);
  dump_file(link, argv[2]);

  fsp_close_link(link);
}
Example #27
0
int main(int argc, char *argv[])
{
    int verbosity = 0;
    int dryrun = 0;
    char *password = NULL;
    char *format = "auto";
    FILE *msg = stderr;
    char *optstring = "am:M:vnf:";
    int c, opt_index = 0, help = 0;
    int files = 0, adding = 0;
    char *kb_name = NULL;
    char *model[argc], *uri[argc];
    char *model_default = NULL;

    password = fsp_argv_password(&argc, argv);

    static struct option long_options[] = {
        { "add", 0, 0, 'a' },
        { "model", 1, 0, 'm' },
        { "model-default", 1, 0, 'M' },
        { "verbose", 0, 0, 'v' },
        { "dryrun", 0, 0, 'n' },
        { "no-resources", 0, 0, 'R' },
        { "no-quads", 0, 0, 'Q' },
        { "format", 1, 0, 'f' },
        { "help", 0, 0, 'h' },
        { "version", 0, 0, 'V' },
        { 0, 0, 0, 0 }
    };

    for (int i= 0; i < argc; ++i) {
      model[i] = NULL;
    }

    int help_return = 1;

    while ((c = getopt_long (argc, argv, optstring, long_options, &opt_index)) != -1) {
        if (c == 'm') {
	    model[files++] = optarg;
        } else if (c == 'M') {
            model_default = optarg;
        } else if (c == 'v') {
	    verbosity++;
        } else if (c == 'a') {
	    adding = 1;
        } else if (c == 'n') {
	    dryrun |= FS_DRYRUN_DELETE | FS_DRYRUN_RESOURCES | FS_DRYRUN_QUADS;
	} else if (c == 'R') {
	    dryrun |= FS_DRYRUN_RESOURCES;
	} else if (c == 'Q') {
	    dryrun |= FS_DRYRUN_QUADS;
        } else if (c == 'f') {
            format = optarg;
        } else if (c == 'h') {
	    help = 1;
            help_return = 0;
        } else if (c == 'V') {
            printf("%s, built for 4store %s\n", argv[0], GIT_REV);
            exit(0);
        } else {
	    help = 1;
        }
    }

    if (verbosity > 0) {
	if (dryrun & FS_DRYRUN_DELETE) {
	    printf("warning: not deleting old model\n");
	}
	if (dryrun & FS_DRYRUN_RESOURCES) {
	    printf("warning: not importing resource nodes\n");
	}
	if (dryrun & FS_DRYRUN_QUADS) {
	    printf("warning: not importing quad graph\n");
	}
    }

    files = 0;
    for (int k = optind; k < argc; ++k) {
        if (!kb_name) {
            kb_name = argv[k];
        } else {
	    if (strchr(argv[k], ':')) {
		uri[files] = g_strdup(argv[k]);
	    } else {
		uri[files] = (char *)raptor_uri_filename_to_uri_string(argv[k]);
	    }
            if (!model[files]) {
                if (!model_default) {
                    model[files] = uri[files];
                } else {
                    model[files] = model_default;
                }
            }
            files++;
        }
    }

    raptor_world *rw = raptor_new_world();
    if (help || !kb_name || files == 0) {
        fprintf(stdout, "%s revision %s\n", argv[0], FS_FRONTEND_VER);
        fprintf(stdout, "Usage: %s <kbname> <rdf file/URI> ...\n", argv[0]);
        fprintf(stdout, " -v --verbose   increase verbosity (can repeat)\n");
        fprintf(stdout, " -a --add       add data to models instead of replacing\n");
        fprintf(stdout, " -m --model     specify a model URI for the next RDF file\n");
        fprintf(stdout, " -M --model-default specify a model URI for all RDF files\n");
        fprintf(stdout, " -f --format    specify an RDF syntax for the import\n");
        fprintf(stdout, "\n   available formats are:\n");

        for (unsigned int i=0; 1; i++) {
            const raptor_syntax_description *desc =
                    raptor_world_get_parser_description(rw, i);
            if (!desc) {
                break;
            }
            fprintf(stdout, "    %12s - %s\n", desc->names[0], desc->label);
        }
        exit(help_return);
    }

    fsp_syslog_enable();

    fsplink = fsp_open_link(kb_name, password, FS_OPEN_HINT_RW);

    if (!fsplink) {
      fs_error (LOG_ERR, "couldn't connect to “%s”", kb_name);
      exit(2);
    }

    const char *features = fsp_link_features(fsplink);
    int has_o_index = !(strstr(features, "no-o-index")); /* tweak */

    fs_hash_init(fsp_hash_type(fsplink));
    const int segments = fsp_link_segments(fsplink);
    int total_triples = 0;

    fs_import_timing timing[segments];

    for (int seg = 0; seg < segments; seg++) {
        fsp_get_import_times(fsplink, seg, &timing[seg]);
    }

    gettimeofday(&then, 0);

    if (fsp_start_import_all(fsplink)) {
	fs_error(LOG_ERR, "aborting import");

	exit(3);
    }

#if 0
printf("press enter\n");
char foo;
read(0, &foo, 1);
#endif

    fs_rid_vector *mvec = fs_rid_vector_new(0);

    for (int f= 0; f < files; ++f) {
        fs_rid muri = fs_hash_uri(model[f]);
        fs_rid_vector_append(mvec, muri);
    }
    if (!adding) {
        if (verbosity) {
	    printf("removing old data\n");
	    fflush(stdout);
        }
        if (!(dryrun & FS_DRYRUN_DELETE)) {
	    if (fsp_delete_model_all(fsplink, mvec)) {
	        fs_error(LOG_ERR, "model delete failed");
	        return 1;
	    }
	    for (int i=0; i<mvec->length; i++) {
		if (mvec->data[i] == fs_c.system_config) {
		    fs_import_reread_config();
		}
	    }
        }
        fsp_new_model_all(fsplink, mvec);
    }

    fs_rid_vector_free(mvec);

    gettimeofday(&then_last, 0);
    for (int f = 0; f < files; ++f) {
	if (verbosity) {
            printf("Reading <%s>\n", uri[f]);
            if (strcmp(uri[f], model[f])) {
                printf("   into <%s>\n", model[f]);
            }
	    fflush(stdout);
        }

        fs_import(fsplink, model[f], uri[f], format, verbosity, dryrun, has_o_index, msg, &total_triples);
	if (verbosity) {
	    fflush(stdout);
        }
    }
    double sthen = fs_time();
    int ret = fs_import_commit(fsplink, verbosity, dryrun, has_o_index, msg, &total_triples);

    if (verbosity > 0) {
	printf("Updating index\n");
        fflush(stdout);
    }
    fsp_stop_import_all(fsplink);
    if (verbosity > 0) {
        printf("Index update took %f seconds\n", fs_time()-sthen);
    }

    if (!ret) {
        gettimeofday(&now, 0);
        double diff = (now.tv_sec - then.tv_sec) +
                        (now.tv_usec - then.tv_usec) * 0.000001;
        if (verbosity && total_triples > 0) {
	    printf("Imported %d triples, average %d triples/s\n", total_triples,
		     (int)((double)total_triples/diff));
            fflush(stdout);
        }
    }

    if (verbosity > 1) {
        printf("seg add_q\tadd_r\t\tcommit_q\tcommit_r\tremove\t\trebuild\t\twrite\n");
        long long *tics = fsp_profile_write(fsplink);

        for (int seg = 0; seg < segments; seg++) {
            fs_import_timing newtimes;
            fsp_get_import_times(fsplink, seg, &newtimes);

	    printf("%2d: %f\t%f\t%f\t%f\t%f\t%f\t%f\n", seg,
                   newtimes.add_s - timing[seg].add_s,
	           newtimes.add_r - timing[seg].add_r,
	           newtimes.commit_q - timing[seg].commit_q,
                   newtimes.commit_r - timing[seg].commit_r,
                   newtimes.remove - timing[seg].remove,
		   newtimes.rebuild - timing[seg].rebuild,
		   tics[seg] * 0.001);
	}
    }

    fsp_close_link(fsplink);
    raptor_free_world(rw);

    return 0;
}
Example #28
0
/* return to = from [X] to, this is used to perform joins inside blocks, it
 * saves allocations by doing most operations inplace, unlike fs_binding_join */
void fs_binding_merge(fs_query *q, int block, fs_binding *from, fs_binding *to)
{
    fs_binding *inter_f = NULL; /* the intersecting column */
    fs_binding *inter_t = NULL; /* the intersecting column */

    for (int i=0; from[i].name; i++) {
	from[i].sort = 0;
	to[i].sort = 0;
    }
    int used = 0;
    for (int i=1; from[i].name; i++) {
	if (!from[i].bound || !to[i].bound) continue;
        if (from[i].used) used++;

	if (from[i].bound && to[i].bound) {
	    inter_f = from+i;
	    inter_t = to+i;
	    from[i].sort = 1;
	    to[i].sort = 1;
#ifdef DEBUG_MERGE
    printf("@@ join on %s\n", to[i].name);
#endif
	}
    }

    /* from and to bound variables do not intersect, we can just dump results,
       under some circustances we need to do a combinatorial explosion */
    if (!inter_f && (fs_binding_length(from) == 0)) {
	const int length_f = fs_binding_length(from);
	const int length_t = fs_binding_length(to);
	for (int i=1; from[i].name; i++) {
	    if (to[i].bound && !from[i].bound) {
                if (from[i].vals) {
                    fs_rid_vector_free(from[i].vals);
                }
		from[i].vals = fs_rid_vector_new(length_f);
		for (int d=0; d<length_f; d++) {
		    from[i].vals->data[d] = FS_RID_NULL;
		}
		from[i].bound = 1;
	    }
	    if (!from[i].bound) continue;
	    if (!to[i].bound) {
                if (to[i].vals) {
                    fs_rid_vector_free(to[i].vals);
                }
		to[i].vals = fs_rid_vector_new(length_t);
		for (int d=0; d<length_t; d++) {
                    to[i].vals->data[d] = FS_RID_NULL;
                }
	    }
	    fs_rid_vector_append_vector(to[i].vals, from[i].vals);
	    to[i].bound = 1;
	}
#ifdef DEBUG_MERGE
        printf("append all, result:\n");
        fs_binding_print(to, stdout);
#endif

	return;
    }

    /* If were running in restricted mode, truncate the binding tables */
    if (q->flags & FS_QUERY_RESTRICTED) {
        fs_binding_truncate(from, q->soft_limit);
        fs_binding_truncate(to, q->soft_limit);
    }

    int length_t = fs_binding_length(to);
    int length_f = fs_binding_length(from);
    /* ms8: this list keeps track of the vars to replace */
    GList *rep_list = NULL;
    for (int i=1; to[i].name; i++) {
	if (to+i == inter_t || to[i].used || to[i].bound) {
	    /* do nothing */
#if DEBUG_MERGE > 1
    printf("@@ preserve %s\n", to[i].name);
#endif
	} else if (from[i].bound && !to[i].bound) {
#if DEBUG_MERGE > 1
    printf("@@ replace %s\n", from[i].name);
#endif
	    to[i].bound = 1;
            if (to[i].vals) {
                if (to[i].vals->length != length_t) {
                    fs_rid_vector_free(to[i].vals);
                    to[i].vals = fs_rid_vector_new(length_t);
                }
            } else {
                to[i].vals = fs_rid_vector_new(length_t);
            }
	    for (int d=0; d<length_t; d++) {
		to[i].vals->data[d] = FS_RID_NULL;
	    }
        rep_list = g_list_append(rep_list, GINT_TO_POINTER(i));
	}
    }

    /* sort the two sets of bindings so they can be merged linearly */
    if (inter_f) {
        fs_binding_sort(from);
        fs_binding_sort(to);
    } else {
        /* make sure the tables are not marked sorted */
        from[0].vals->length = 0;
        to[0].vals->length = 0;
    }

#ifdef DEBUG_MERGE
    printf("old: %d bindings\n", fs_binding_length(from));
    fs_binding_print(from, stdout);
    printf("new: %d bindings\n", fs_binding_length(to));
    fs_binding_print(to, stdout);
#endif

    int fpos = 0;
    int tpos = 0;
    while (fpos < length_f || tpos < length_t) {
        if (q->flags & FS_QUERY_RESTRICTED &&
            fs_binding_length(to) >= q->soft_limit) {
            char *msg = g_strdup("some results have been dropped to prevent overunning time allocation");
            q->warnings = g_slist_prepend(q->warnings, msg);
            break;
        }
	int cmp;
	cmp = binding_row_compare(q, from, to, fpos, tpos, length_f, length_t);
	if (cmp == 0) {
	    /* both rows match */
	    int fp, tp = tpos;
	    for (fp = fpos; binding_row_compare(q, from, to, fp, tpos, length_f, length_t) == 0; fp++) {
#if DEBUG_MERGE > 1
if (fp == DEBUG_CUTOFF) {
    printf("...\n");
}
#endif
		for (tp = tpos; 1; tp++) {
		    if (binding_row_compare(q, from, to, fp, tp, length_f, length_t) == 0) {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    printf("STEP %d, %d  ", fp-fpos, tp-tpos);
}
#endif
			if (fp == fpos) {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    if (inter_f) {
	printf("REPL %llx\n", inter_f->vals->data[fp]);
    } else {
	printf("REPL ???\n");
    }
}
#endif
			    for (int c=1; to[c].name; c++) {
				if (!from[c].bound && !to[c].bound) continue;
				if (from[c].bound && table_value(from, c, fp) == FS_RID_NULL) {
				    continue;
				}
				if (from[c].bound && fp < from[c].vals->length) {
                                    long wrow = to[0].vals->length ? to[0].vals->data[tp] : tp;
				    to[c].vals->data[wrow] = table_value(from, c, fp);
				    if (to[c].vals->length <= tp) {
					to[c].vals->length = tp+1;
				    }
				}
			    }
			} else {
#if DEBUG_MERGE > 1
if (fp < DEBUG_CUTOFF) {
    printf("ADD\n");
}
#endif
			    for (int c=1; to[c].name; c++) {
				if (!from[c].bound && !to[c].bound) continue;
				if (from[c].bound && fp < from[c].vals->length) {
				    fs_rid_vector_append(to[c].vals, table_value(from, c, fp));
				} else {
				    fs_rid_vector_append(to[c].vals, table_value(to, c, tp));
				}
			    }
			}
		    } else {
			break;
		    }
		}
	    }
	    tpos = tp;
	    fpos = fp;
	} else if (cmp <= -1) {
	    fpos++;
	} else if (cmp >= 1) {
	    tpos++;
	} else {
	    fs_error(LOG_CRIT, "unknown compare state %d in binding", cmp);
	}
    }

    /* clear the _ord columns */
    from[0].vals->length = 0;
    to[0].vals->length = 0;

    /* ms8: INIT code to clean up rows that where not replaced */
    if (rep_list) {
        unsigned char *to_del = fs_new_bit_array(length_t);
        int to_del_count = 0;
        while(rep_list) {
            int col_r = GPOINTER_TO_INT(rep_list->data);
             rep_list = g_list_next(rep_list);
             for (int d=0; d<length_t; d++) {
                if (to[col_r].vals->data[d] == FS_RID_NULL) {
                     fs_bit_array_set(to_del, d, 0);
                     to_del_count++;
                }
             }
         }
         g_list_free(rep_list);
         if (to_del_count) {
             int vars = 0;
             for (int i=1; to[i].name; i++)
                vars++;
             fs_rid_vector **clean = calloc(vars, sizeof(fs_rid_vector *));
             for (int i=0;i<vars;i++)
                clean[i] = fs_rid_vector_new(0);
             for (int d = 0;d<length_t;d++) {
                   if (fs_bit_array_get(to_del,d)) {
                     for (int i=0;i<vars;i++) {
                        fs_rid_vector_append(clean[i],to[i+1].vals->data[d]);
                     }
                   }
             }
             for (int i=1;i<=vars;i++) {
                free(to[i].vals->data);
                to[i].vals->data = clean[i-1]->data;
                to[i].vals->length = clean[i-1]->length;
                to[i].vals->size = clean[i-1]->size;
                free(clean[i-1]);
             }
             free(clean);
         }
         fs_bit_array_destroy(to_del);
     }
    /* ms8: END code to clean up rows that where not replaced */

#ifdef DEBUG_MERGE
    printf("result: %d bindings\n", fs_binding_length(to));
    fs_binding_print(to, stdout);
#endif
}