Пример #1
0
END_TEST


START_TEST (check_fs_rid_vector_uniq)
{
  fs_rid_vector *v = fs_rid_vector_new(0);
  fail_unless(v != NULL, "fs_rid_vector is NULL");
  int i=0;
  for (i=100; i < 500; i++) {
      fs_rid_vector_append(v, i);
      fs_rid_vector_append(v, i);
  }
  fs_rid_vector_append(v,FS_RID_NULL);
  fail_if(fs_rid_vector_length(v) != (((500 - 100)*2) + 1), "fs_rid_vector_length failed");
  fs_rid_vector_uniq(v, 0); //Not remove nulls
  fail_if(fs_rid_vector_length(v) != ((500 - 100) + 1), "fs_rid_vector_length failed");
  for (i=1; i < fs_rid_vector_length(v) - 1; i++) {
      fail_if(v->data[i] == v->data[i-1], "unique values failed [%d %d]",v->data[i],v->data[i-1]);
  }
  fail_if(v->data[fs_rid_vector_length(v)-1] != FS_RID_NULL, "FS_RID_NULL isn't there.");
  v->data[123] = FS_RID_NULL;
  fs_rid_vector_uniq(v, 1); //remove nulls
  fail_if(fs_rid_vector_length(v) != ((500 - 100) -1), "fs_rid_vector_length failed");
  fail_if(v->data[fs_rid_vector_length(v)-1] == FS_RID_NULL, "FS_RID_NULL should not be there %llx",
          v->data[fs_rid_vector_length(v)-1]);
  fail_if(v->data[123] == FS_RID_NULL, "FS_RID_NULL should not be there %llx",
          v->data[fs_rid_vector_length(v)-1]);
  fs_rid_vector_free(v);
}
Пример #2
0
void fs_mhash_print(fs_mhash *mh, FILE *out, int verbosity)
{
    if (!mh) {
        fs_error(LOG_CRIT, "tried to print NULL mhash");

        return;
    }
    fs_mhash_entry e;
    fs_rid_vector *models = fs_rid_vector_new(0);
    fs_rid last_model = FS_RID_NULL;
    int entry = 0;
    int count = 0;

    fprintf(out, "mhash %s\n", mh->filename);
    fprintf(out, "  count: %d\n", mh->count);
    fprintf(out, "  size: %d\n", mh->size);
    fprintf(out, "\n");

    lseek(mh->fd, sizeof(struct mhash_header), SEEK_SET);
    while (read(mh->fd, &e, sizeof(e)) == sizeof(e)) {
        if (e.val) {
            count++;
            if (verbosity > 0) {
                fprintf(out, "%8d %016llx %8d\n", entry, e.rid, e.val);
            }
            fs_rid_vector_append(models, e.rid);
            if (e.rid == last_model) {
                fprintf(out, "ERROR: %s model %016llx appears multiple times\n",
                        mh->filename, e.rid);
            }
            last_model = e.rid;
        }
        entry++;
    }

    if (mh->count != count) {
        fprintf(out, "ERROR: %s header count %d != scanned count %d\n",
                mh->filename, mh->count, count);
    }

    int oldlength = models->length;
    fs_rid_vector_sort(models);
    fs_rid_vector_uniq(models, 0);
    if (models->length != oldlength) {
        fprintf(out, "ERROR: %s some models appear > 1 time\n",
                mh->filename);
    }
}
Пример #3
0
Файл: dump.c Проект: rafl/4store
xmlChar *get_uri(fsp_link *link, fs_rid rid)
{
  if (cache[rid & CACHE_MASK].rid == rid) {
    return (xmlChar *) cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;
  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);

  return (xmlChar *) resource.lex;
}

xmlChar *get_attr(fsp_link *link, fs_rid rid)
{
  if (attr_cache[rid & CACHE_MASK].rid == rid) {
    return (xmlChar *) attr_cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;

  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);
  memcpy(&attr_cache[rid & ATTR_CACHE_MASK], &resource, sizeof(fs_resource));

  return (xmlChar *) resource.lex;
}

xmlChar *get_literal(fsp_link *link, fs_rid rid, fs_rid *attr)
{
  if (cache[rid & CACHE_MASK].rid == rid) {
    *attr = cache[rid & CACHE_MASK].attr;
    return (xmlChar *) cache[rid & CACHE_MASK].lex;
  }

  fs_rid_vector onerid = { .length = 1, .size = 1, .data = &rid };
  fs_resource resource;

  fsp_resolve(link, FS_RID_SEGMENT(rid, segments), &onerid, &resource);
  *attr = resource.attr;

  return (xmlChar *) resource.lex;
}

void resolve_triples(fsp_link *link, fs_rid_vector **rids)
{
  int quads = rids[0]->length;
  fs_rid_vector *todo[segments];
  fs_segment segment;

  for (segment = 0; segment < segments; ++segment) {
    todo[segment] = fs_rid_vector_new(0);
  }
  for (int c = 0; c < 3; ++c) {
    for (int k = 0; k < quads; ++k) {
      const fs_rid rid = rids[c]->data[k];
      if (FS_IS_BNODE(rid) || cache[rid & CACHE_MASK].rid == rid) continue;
      fs_rid_vector_append(todo[FS_RID_SEGMENT(rid, segments)], rid);
      cache[rid & CACHE_MASK].rid = rid; /* well, it will be soon */
    }
  } 

  int length[segments];
  fs_resource *resources[segments];
  for (segment = 0; segment < segments; ++segment) {
    length[segment] = todo[segment]->length;
    resources[segment] = calloc(length[segment], sizeof(fs_resource));
  }

  fsp_resolve_all(link, todo, resources);

  for (segment = 0; segment < segments; ++segment) {
    fs_resource *res = resources[segment];
    for (int k = 0; k < length[segment]; ++k) {
      free(cache[res[k].rid & CACHE_MASK].lex);
      memcpy(&cache[res[k].rid & CACHE_MASK], &res[k], sizeof(fs_resource));
    }

    fs_rid_vector_free(todo[segment]);
    free(resources[segment]);
  }
}

void dump_model(fsp_link *link, fs_rid model, xmlTextWriterPtr xml)
{
  fs_rid_vector none = { .length = 0, .size = 0, .data = 0 };
  fs_rid_vector one = { .length = 1, .size = 1, .data = &model };

  fs_rid_vector **results;

  double then; /* for time keeping */

  then = fs_time();
  fsp_bind_first_all(link, BIND_SPO, &one, &none, &none, &none, &results, QUAD_LIMIT);
  time_bind_first += (fs_time() - then);

  while (results != NULL) {

    long length = results[0]->length;

    if (length == 0) break;

    then = fs_time();
    resolve_triples(link, results);
    time_resolving += (fs_time() - then);

    then = fs_time();
    for (int k = 0; k < length; ++k) {
      xmlTextWriterStartElement(xml, (xmlChar *) "triple");

      for (int r = 0; r < 3; ++r) {
        fs_rid rid = results[r]->data[k];
        if (FS_IS_BNODE(rid)) {
          unsigned long long node = FS_BNODE_NUM(rid);
          xmlTextWriterWriteFormatElement(xml, (xmlChar *) "id", "%llu", node);
        } else if (FS_IS_URI(rid)) {
          xmlChar *uri = get_uri(link, rid);
          xmlTextWriterWriteElement(xml, (xmlChar *) "uri", uri);
        } else if (FS_IS_LITERAL(rid)) {
          fs_rid attr;
          xmlChar *lex = get_literal(link, rid, &attr);
          if (attr == fs_c.empty) {
            xmlTextWriterWriteElement(xml, (xmlChar *) "plainLiteral", lex);
          } else if (FS_IS_URI(attr)) {
            xmlChar *type = get_uri(link, attr);
            xmlTextWriterStartElement(xml, (xmlChar *) "typedLiteral");
            xmlTextWriterWriteString(xml, (xmlChar *) lex);
            xmlTextWriterWriteAttribute(xml, (xmlChar *) "datatype", type);
            xmlTextWriterEndElement(xml);
          } else if (FS_IS_LITERAL(attr)) {
            xmlChar *lang = get_attr(link, attr);
            xmlTextWriterStartElement(xml, (xmlChar *) "plainLiteral");
            xmlTextWriterWriteAttribute(xml, (xmlChar *) "xml:lang", lang);
            xmlTextWriterWriteString(xml, (xmlChar *) lex);
            xmlTextWriterEndElement(xml);
          }
        }
      }
      xmlTextWriterEndElement(xml);
      xmlTextWriterWriteString(xml, (xmlChar *) "\n");

    }
    time_write_out += (fs_time() - then);

    fs_rid_vector_free(results[0]);
    fs_rid_vector_free(results[1]);
    fs_rid_vector_free(results[2]);
    free(results);

    then = fs_time();
    fsp_bind_next_all(link, BIND_SPO, &results, QUAD_LIMIT);
    time_bind_next += (fs_time() - then);
  }

  fsp_bind_done_all(link);
}

void dump_trix(fsp_link *link, xmlTextWriterPtr xml)
{
  fs_rid_vector **models;
  fs_rid_vector none = { .length = 0, .size = 0, .data = 0 };

  fsp_bind_all(link, FS_BIND_DISTINCT | FS_BIND_MODEL | FS_BIND_BY_SUBJECT, &none, &none, &none, &none, &models);

  fs_rid_vector_sort(models[0]);
  fs_rid_vector_uniq(models[0], 1);

  long length = models[0]->length;

  for (int k = 0; k < length; ++k) {
    fs_rid model = models[0]->data[k];
    xmlChar *model_uri = get_uri(link, model);
    xmlTextWriterStartElement(xml, (xmlChar *) "graph");
    if (FS_IS_URI(model)) {
      xmlTextWriterWriteElement(xml, (xmlChar *) "uri", model_uri);
    } else {
      fs_error(LOG_WARNING, "model %lld is not a URI", model);
    }

    dump_model(link, model, xml);
    xmlTextWriterEndElement(xml);
    xmlTextWriterWriteString(xml, (xmlChar *) "\n");
printf("%5d/%ld: %4.5f %4.5f %4.5f %4.5f\n", k + 1, length, time_resolving, time_bind_first, time_bind_next, time_write_out);
  }
}

void dump_file(fsp_link *link, char *filename)
{
  xmlTextWriterPtr xml  = xmlNewTextWriterFilename(filename, TRUE);

  if (!xml) {
    fs_error(LOG_ERR, "Couldn't write output file, giving up");
    exit(4);
  }

  xmlTextWriterStartDocument(xml, NULL, NULL, NULL);
  xmlTextWriterStartElement(xml, (xmlChar *) "TriX");
  dump_trix(link, xml);
  xmlTextWriterEndDocument(xml); /* also closes TriX */
  xmlFreeTextWriter(xml);
}

int main(int argc, char *argv[])
{
  char *password = fsp_argv_password(&argc, argv);

  if (argc != 3) {
    fprintf(stderr, "%s revision %s\n", argv[0], FS_FRONTEND_VER);
    fprintf(stderr, "Usage: %s <kbname> <uri>\n", argv[0]);
    exit(1);
  }

  fsp_link *link = fsp_open_link(argv[1], password, FS_OPEN_HINT_RO);

  if (!link) {
    fs_error (LOG_ERR, "couldn't connect to “%s”", argv[1]);
    exit(2);
  }

  fs_hash_init(fsp_hash_type(link));
  segments = fsp_link_segments(link);
  dump_file(link, argv[2]);

  fsp_close_link(link);
}