Beispiel #1
0
CRDFGraph * CRDFParser::parse(std::istream & stream)
{
  bool success = true;
  bool done = false;

  stream.imbue(std::locale::classic());
  stream.precision(16);

  // Create the new graph
  CRDFGraph * pGraph = new CRDFGraph;

  unsigned C_INT32 BUFFER_SIZE = 0xfffe;
  char * pBuffer = new char[BUFFER_SIZE + 1];

  stream.get(pBuffer, BUFFER_SIZE, 0);

  if (stream.gcount() != 0)
    {
      raptor_uri * pURI = raptor_new_uri((unsigned char *) "#");

      if (raptor_start_parse(mpParser, pURI))
        fatalError();

      raptor_set_statement_handler(mpParser, pGraph, &CRDFParser::TripleHandler);
      raptor_set_namespace_handler(mpParser, pGraph, &CRDFParser::NameSpaceHandler);
      raptor_set_generate_id_handler(mpParser, pGraph, &CRDFParser::GenerateIdHandler);

      while (!done)
        {
          if (stream.eof()) done = true;

          if (stream.fail() && !done)
            fatalError();

          if (raptor_parse_chunk(mpParser,
                                 (unsigned char *) pBuffer,
                                 strlen(pBuffer),
                                 done ? 1 : 0))
            {
              done = true;
              success = false;
            }

          stream.get(pBuffer, BUFFER_SIZE, 0);
        }
    }

  delete [] pBuffer;

  if (!success)
    {
      pdelete(pGraph);
      return NULL;
    }

  if (pGraph)
    pGraph->guessGraphRoot();

  return pGraph;
}
Beispiel #2
0
/* ..._start and ..._finish share an int * count parameter
 * the same variable should be passed by reference both times */
int fs_import_stream_start(fsp_link *link, const char *model_uri, const char *mimetype, int has_o_index, int *count)
{
    if (inited == 0) {
        memset(&parse_data, 0, sizeof(parse_data));
        inited = 1;
    }

    parse_data.link = link;
    parse_data.segments = fsp_link_segments(link);
    parse_data.ext_count = count;

    for (int i=0; i<parse_data.segments; i++) {
        for (int j=0; j<RES_BUF_SIZE; j++) {
            lex_tmp[i][j] = malloc(RES_BUF_SIZE);
        }
    }

    memset(nodecache, 0, sizeof(nodecache));

    parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX");
    parse_data.quad_fd = mkstemp(parse_data.quad_fn);
    if (parse_data.quad_fd < 0) {
        fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn);
        return 1;
    }

    parse_data.muri = raptor_new_uri((unsigned char *) model_uri);

    parse_data.model = g_strdup(model_uri);
    parse_data.model_hash = fs_hash_uri(model_uri);
    parse_data.count_trip = 0;
    parse_data.count_err = 0;
    parse_data.last_count = 0;
    parse_data.has_o_index = has_o_index;

    /* store the model uri */
    buffer_res(link, parse_data.segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, parse_data.dryrun);

    parse_data.parser = raptor_new_parser_for_content(NULL, mimetype, NULL, 0, (unsigned char *) parse_data.model);

    if (!parse_data.parser) {
        return 1;
    }

    /* use us as a vector for an indirect attack? no thanks */
    raptor_set_feature(parse_data.parser, RAPTOR_FEATURE_NO_NET, 0);

    raptor_set_fatal_error_handler(parse_data.parser, link, fatal_rdf_parser_error);
    raptor_set_error_handler(parse_data.parser, link, rdf_parser_error);

    raptor_set_statement_handler(parse_data.parser, &parse_data, store_stmt);
    raptor_set_graph_handler(parse_data.parser, &parse_data, graph_handler);

    raptor_start_parse(parse_data.parser, parse_data.muri);

    fs_hash_freshen(); /* blank nodes are unique per file */

    return 0;
}
Beispiel #3
0
void raptorNewParser(string prs)
{
  string_copyto(Seqbyte_print_buf,Seqbyte_print_buf_length,prs);
  theParser = raptor_new_parser(Seqbyte_print_buf);
//  raptor_set_feature(theParser,RAPTOR_FEATURE_SCANNING,1);
  raptor_set_fatal_error_handler(theParser,NULL,raptorErrorHandler);
  raptor_set_error_handler(theParser,NULL,raptorMessageHandler); 
  raptor_set_warning_handler(theParser,NULL,raptorMessageHandler); 
  raptor_set_statement_handler(theParser,NULL,raptorStatementHandler);
 }
Beispiel #4
0
int
main (int argc, char *argv[])
{
    raptor_parser* rdf_parser;
    raptor_uri* uri;
    my_data* me;
    const char *program;
    int rc;

    program=argv[0];

    if(argc != 2) {
        fprintf(stderr, "%s: USAGE [RDF-XML content URI]\n", program);
        exit(1);
    }

    raptor_init();

    me=(my_data*)malloc(sizeof(my_data));
    if(!me) {
        fprintf(stderr, "%s: Out of memory\n", program);
        exit(1);
    }

    me->stream=stderr;
    me->count=0;
    me->max=5;

    uri=raptor_new_uri((const unsigned char*)argv[1]);
    rdf_parser=raptor_new_parser("rdfxml");

    me->parser=rdf_parser;

    raptor_set_statement_handler(rdf_parser, me, handle_statements);

    me->stopped=0;
    rc=raptor_parse_uri(rdf_parser, uri, NULL);

    fprintf(stderr, "%s: Parser returned status %d, stopped? %s\n", program, rc,
            (me->stopped ? "yes" : "no"));

    free(me);

    raptor_free_parser(rdf_parser);

    raptor_free_uri(uri);

    raptor_finish();

    return 0;
}
Beispiel #5
0
int
main(int argc, char *argv[]) 
{
  raptor_parser* rdf_parser=NULL;
  unsigned char *uri_string=NULL;
  int free_uri_string=0;
  unsigned char *base_uri_string=NULL;
  int rc;
  int scanning=0;
  const char *syntax_name="rdfxml";
  int strict_mode=0;
  int usage=0;
  int help=0;
  raptor_uri *base_uri;
  raptor_uri *uri;
  char *p;
  char *filename=NULL;

  program=argv[0];
  if((p=strrchr(program, '/')))
    program=p+1;
  else if((p=strrchr(program, '\\')))
    program=p+1;
  argv[0]=program;

  raptor_init();
  
  while (!usage && !help)
  {
    int c;
#ifdef HAVE_GETOPT_LONG
    int option_index = 0;

    c = getopt_long (argc, argv, GETOPT_STRING, long_options, &option_index);
#else
    c = getopt (argc, argv, GETOPT_STRING);
#endif
    if (c == -1)
      break;

    switch (c) {
      case 0:
      case '?': /* getopt() - unknown option */
        usage=1;
        break;
        
      case 'a':
        break;

      case 'c':
        count=1;
        break;

      case 'h':
        help=1;
        break;

      case 'n':
        syntax_name="ntriples";
        break;

      case 's':
        scanning=1;
        break;

      case 'q':
        quiet=1;
        break;

      case 'r':
        replace_newlines=1;
        break;

      case 'm':
        if(optarg) {
          if(!strcmp(optarg, "strict"))
            strict_mode=1;
          else if (!strcmp(optarg, "lax"))
            strict_mode=0;
          else {
            fprintf(stderr, "%s: invalid argument `%s' for `" HELP_ARG(m, mode) "'\n",
                    program, optarg);
            fprintf(stderr, "Valid arguments are:\n  - `lax'\n  - `strict'\n");
            usage=1;
          }
        }
        break;

      case 'o':
        if(optarg) {
          if(!strcmp(optarg, "simple"))
            output_format=OUTPUT_FORMAT_SIMPLE;
          else if (!strcmp(optarg, "ntriples"))
            output_format=OUTPUT_FORMAT_NTRIPLES;
          else {
            fprintf(stderr, "%s: invalid argument `%s' for `" HELP_ARG(o, output) "'\n",
                    program, optarg);
            fprintf(stderr, "Valid arguments are:\n  `simple'   for a simple format (default)\n  `ntriples' for N-Triples\n");
            usage=1;
          }
        }
        break;

      case 'i':
        if(optarg) {
          if(raptor_syntax_name_check(optarg))
            syntax_name=optarg;
          else {
            int i;
            
            fprintf(stderr, "%s: invalid argument `%s' for `" HELP_ARG(i, input) "'\n",
                    program, optarg);
            fprintf(stderr, "Valid arguments are:\n");
            for(i=0; 1; i++) {
              const char *help_name;
              const char *help_label;
              if(raptor_syntaxes_enumerate(i, &help_name, &help_label, NULL, NULL))
                break;
              printf("  %-12s for %s\n", help_name, help_label);
            }
            usage=1;
            break;
            
          }
        }
        break;

      case 'w':
        ignore_warnings=1;
        break;
        
      case 'e':
        ignore_errors=1;
        break;

      case 'v':
        fputs(raptor_version_string, stdout);
        fputc('\n', stdout);
        exit(0);
    }
    
  }

  if(optind != argc-1 && optind != argc-2 && !help && !usage) {
    usage=2; /* Title and usage */
  }

  
  if(usage) {
    if(usage>1) {
      fprintf(stderr, title_format_string, raptor_version_string);
      fputs(raptor_short_copyright_string, stderr);
      fputc('\n', stderr);
    }
    fprintf(stderr, "Try `%s " HELP_ARG(h, help) "' for more information.\n",
                    program);
    exit(1);
  }

  if(help) {
    int i;
    
    printf("Usage: %s [OPTIONS] <source URI> [base URI]\n", program);
    printf(title_format_string, raptor_version_string);
    puts(raptor_short_copyright_string);
    puts("Parse RDF content at the source URI into RDF triples.");
    puts("\nMain options:");
    puts(HELP_TEXT(h, "help            ", "Print this help, then exit"));
    puts(HELP_TEXT(i, "input FORMAT    ", "Set input format to one of:"));
    for(i=0; 1; i++) {
      const char *help_name;
      const char *help_label;
      if(raptor_syntaxes_enumerate(i, &help_name, &help_label, NULL, NULL))
        break;
      printf("    %-12s            %s", help_name, help_label);
      if(!i)
        puts(" (default)");
      else
        putchar('\n');
    }
    puts(HELP_TEXT(o, "output FORMAT   ", "Set output format to one of:"));
    puts("    'simple'                A simple format (default)\n    'ntriples'              N-Triples");
    puts(HELP_TEXT(m, "mode MODE       ", "Set parser mode - 'lax' (default) or 'strict'"));
    puts("\nAdditional options:");
    puts(HELP_TEXT(c, "count           ", "Count triples - no output"));
    puts(HELP_TEXT(e, "ignore-errors   ", "Ignore error messages"));
    puts(HELP_TEXT(q, "quiet           ", "No extra information messages"));
    puts(HELP_TEXT(r, "replace-newlines", "Replace newlines with spaces in literals"));
    puts(HELP_TEXT(s, "scan            ", "Scan for <rdf:RDF> element in source"));
    puts(HELP_TEXT(w, "ignore-warnings ", "Ignore warning messages"));
    puts(HELP_TEXT(v, "version         ", "Print the Raptor version"));
    puts("\nReport bugs to <*****@*****.**>.");
    puts("Raptor home page: http://www.redland.opensource.ac.uk/raptor/");
    exit(0);
  }


  if(optind == argc-1)
    uri_string=(unsigned char*)argv[optind];
  else {
    uri_string=(unsigned char*)argv[optind++];
    base_uri_string=(unsigned char*)argv[optind];
  }

  /* If uri_string is "path-to-file", turn it into a file: URI */
  if(!strcmp((const char*)uri_string, "-")) {
    if(!base_uri_string) {
      fprintf(stderr, "%s: A Base URI is required when reading from standard input.\n",
              program);
      return(1);
    }
    uri_string=NULL;
  } else if(!access((const char*)uri_string, R_OK)) {
    filename=(char*)uri_string;
    uri_string=raptor_uri_filename_to_uri_string(filename);
    free_uri_string=1;
  }

  if(uri_string) {
    uri=raptor_new_uri(uri_string);
    if(!uri) {
      fprintf(stderr, "%s: Failed to create URI for %s\n",
              program, uri_string);
      return(1);
    }
  } else
    uri=NULL; /* stdin */


  if(!base_uri_string) {
    base_uri=raptor_uri_copy(uri);
  } else {
    base_uri=raptor_new_uri(base_uri_string);
    if(!base_uri) {
      fprintf(stderr, "%s: Failed to create URI for %s\n",
              program, base_uri_string);
      return(1);
    }
  }

  rdf_parser=raptor_new_parser(syntax_name);
  if(!rdf_parser) {
    fprintf(stderr, "%s: Failed to create raptor parser type %s\n", program,
            syntax_name);
    return(1);
  }
  
  raptor_set_error_handler(rdf_parser, rdf_parser, rdfdump_error_handler);
  raptor_set_warning_handler(rdf_parser, rdf_parser, rdfdump_warning_handler);
  
  raptor_set_parser_strict(rdf_parser, strict_mode);
  
  if(scanning)
    raptor_set_feature(rdf_parser, RAPTOR_FEATURE_SCANNING, 1);

  if(!quiet) {
    if (filename) {
      if(base_uri_string)
        fprintf(stdout, "%s: Parsing file %s with base URI %s\n", program,
                filename, base_uri_string);
      else
        fprintf(stdout, "%s: Parsing file %s\n", program, filename);
    } else {
      if(base_uri_string)
        fprintf(stdout, "%s: Parsing URI %s with base URI %s\n", program,
                uri_string, base_uri_string);
      else
        fprintf(stdout, "%s: Parsing URI %s\n", program, uri_string);
    }
  }
  
  raptor_set_statement_handler(rdf_parser, NULL, print_statements);


  /* PARSE the URI as RDF/XML */
  rc=0;
  if(!uri || filename) {
    if(raptor_parse_file(rdf_parser, uri, base_uri)) {
      fprintf(stderr, "%s: Failed to parse file %s %s content\n", program, 
              filename, syntax_name);
      rc=1;
    }
  } else {
    if(raptor_parse_uri(rdf_parser, uri, base_uri)) {
      fprintf(stderr, "%s: Failed to parse URI %s %s content\n", program, 
              uri_string, syntax_name);
      rc=1;
    }
  }

  raptor_free_parser(rdf_parser);

  if(!quiet)
    fprintf(stdout, "%s: Parsing returned %d statements\n", program,
            statement_count);

  raptor_free_uri(base_uri);
  if(uri)
    raptor_free_uri(uri);
  if(free_uri_string)
    free(uri_string);

  raptor_finish();

  if(error_count && !ignore_errors)
    return 1;

  if(warning_count && !ignore_warnings)
    return 2;

  return(rc);
}
Beispiel #6
0
/**
 * librdf_new_sql_config:
 * @world: librdf_world
 * @storage_name: SQL storage name
 * @layout: SQL schema variant
 * @config_dir: directory for configuration files
 * @predicate_uri_strings: configuration predicate URIs to look for
 * 
 * Constructor - Make a new SQL configuration for a layout from a file
 *
 * Uses SQL storage name @storage_name and with database schema
 * @layout to give a configuration that will contain an array of
 * string values in the #librdf_sql_config field values array.
 * 
 * Return value: configuration or NULL on failure
 **/
librdf_sql_config*
librdf_new_sql_config(librdf_world* world,
                      const char* storage_name,
                      const char* layout,
                      const char* config_dir,
                      const char** predicate_uri_strings)
{
  raptor_parser* rdf_parser=NULL;
  unsigned char *uri_string=NULL;
  raptor_uri *base_uri;
  raptor_uri *uri;
  librdf_sql_config* config;
  size_t len;
  int i;
  
  librdf_world_open(world);

  config=(librdf_sql_config*)LIBRDF_MALLOC(librdf_sql_config,
                                           sizeof(librdf_sql_config));

  len=strlen(config_dir) + 1 + strlen(storage_name) + 4 + 1;
  if(layout)
    len+= strlen(layout) + 1;
  config->filename=(char*)LIBRDF_MALLOC(cstring, len);
  if(layout)
    sprintf(config->filename, "%s/%s-%s.ttl", config_dir, storage_name, layout);
  else
    sprintf(config->filename, "%s/%s.ttl", config_dir, storage_name);

  config->predicate_uri_strings=predicate_uri_strings;
  for(i=0; config->predicate_uri_strings[i]; i++)
    ;
  config->predicates_count=i;
  config->values=(char**)LIBRDF_CALLOC(cstring, sizeof(char*), 
                                       config->predicates_count);
  
  LIBRDF_DEBUG4("Attempting to open %s layout %s storage config file %s\n", 
                storage_name, (layout ? layout: "(default)"), config->filename);
  
  if(access((const char*)config->filename, R_OK)) {
    librdf_log(world, 0, LIBRDF_LOG_ERROR, LIBRDF_FROM_STORAGE, NULL,
               "Failed to open configuration file %s for storage %s layout %s - %s",
               config->filename, storage_name, (layout ? layout: "(default)"),
               strerror(errno));
    librdf_free_sql_config(config);
    return NULL;
  }
  
  uri_string=raptor_uri_filename_to_uri_string(config->filename);
  uri=raptor_new_uri(uri_string);
  base_uri=raptor_uri_copy(uri);
  
  rdf_parser=raptor_new_parser("turtle");
  raptor_set_statement_handler(rdf_parser, config,
                               librdf_sql_config_store_triple);
  raptor_parse_file(rdf_parser, uri, base_uri);
  raptor_free_parser(rdf_parser);
  
  raptor_free_uri(base_uri);
  raptor_free_memory(uri_string);
  raptor_free_uri(uri);

  /* Check all values are given */
  for(i=0; i < config->predicates_count; i++) {
    if(!config->values[i]) {
      librdf_log(world, 0, LIBRDF_LOG_ERROR, LIBRDF_FROM_STORAGE, NULL,
                 "Configuration %s missing for storage %s",
                 config->predicate_uri_strings[i], storage_name);
      librdf_free_sql_config(config);
      return NULL;
    }
  }
  
  return config;
}
Beispiel #7
0
/* ------------------------------------------------------------
   RDF_parseFile

   Parses a file of RDF.
  
   Parameters:
     file_name - the name of the file to parse
     rdf_model - a list of RDF resources to update

   Returs: a pointer to an annotation's list or NULL if an error
     occurs during the parsing.
 ------------------------------------------------------------*/
List *RDF_parseFile (char *file_name, List **rdf_model)
{
  ParseContext   ctx;
  raptor_parser *rdfxml_parser=NULL;
  raptor_uri    *uri = NULL;
  char          *full_file_name;
  char          *tmp, *path;

  ctx.annot_list = &annot_list;
  ctx.rdf_model = rdf_model;

  annot_list = NULL;

  rdfxml_parser = raptor_new_parser ("rdfxml");

  if (!rdfxml_parser) {
     AnnotList_free (annot_list);
     /* do not free rdf_model here; it may not have been empty to start */
     annot_list = NULL;
     return NULL;
  }  

   /* @@ this is what we should do eventually */
 /* if (!IsW3Path (file_name))
     full_file_name = LocalToWWW (file_name);
  else
  */
  /* raptor doesn't grok file URIs under windows. The following is a patch so
   that we can use it */
  full_file_name = (char *)TtaGetMemory (strlen (file_name) + sizeof ("file:"));
  sprintf (full_file_name, "file:%s", file_name);
#ifdef _WX
  path = (char *)TtaConvertMbsToByte ((unsigned char *)file_name,
	                                 TtaGetLocaleCharset ());
   /* remember the base name for anoynmous subjects */
  tmp = (char *)TtaConvertMbsToByte ((unsigned char *)full_file_name,
	                                 TtaGetLocaleCharset ());
   /* remember the base name for anoynmous subjects */
  TtaFreeMemory (full_file_name);
  full_file_name = tmp;
#else /* _WX */
  path = file_name;
#endif /* _WX */
   /* remember the base name for anoynmous subjects */
  ctx.base_uri = full_file_name;

  raptor_set_statement_handler(rdfxml_parser, (void *) &ctx, triple_handler); 
  tmp = (char *) raptor_uri_filename_to_uri_string ((const char *) path);
  uri = raptor_new_uri ((const unsigned char *) tmp);
#ifdef _WX
  TtaFreeMemory (path);
#endif /* _WX */
  TtaFreeMemory (tmp);

  if (raptor_parse_file (rdfxml_parser, uri, NULL))
    {
      AnnotList_free (annot_list);
      /* do not free rdf_model here; it may not have been empty to start */
      annot_list = NULL;
      TtaFreeMemory (full_file_name);
      raptor_free_parser (rdfxml_parser);
      raptor_free_uri (uri);
      return NULL;
    }

  TtaFreeMemory (full_file_name);
  raptor_free_parser (rdfxml_parser);
  raptor_free_uri (uri);

  Finish_FindAnnot();

  /* output whatever we parsed */
#ifdef _RDFDEBUG
  AnnotList_print (annot_list);
#endif /* _RDFDEBUG */

  return (annot_list);
}
Beispiel #8
0
int fs_import(fsp_link *link, const char *model_uri, char *resource_uri,
	      const char *format, int verbosity, int dryrun, int has_o_index,
              FILE *msg, int *count)
{
    raptor_parser *rdf_parser = NULL;
    raptor_uri ruri = NULL;
    int ret = 0;

    const int segments = fsp_link_segments(link);

    parse_data.ext_count = count;
    if (!inited) {
        inited = 1;
        parse_data.link = link;
        parse_data.segments = fsp_link_segments(link);

        for (int i=0; i<parse_data.segments; i++) {
            for (int j=0; j<RES_BUF_SIZE; j++) {
                lex_tmp[i][j] = malloc(RES_BUF_SIZE);
            }
        }

        memset(nodecache, 0, sizeof(nodecache));

        parse_data.quad_fn = g_strdup(FS_TMP_PATH "/importXXXXXX");
        parse_data.quad_fd = mkstemp(parse_data.quad_fn);
        if (parse_data.quad_fd < 0) {
            fs_error(LOG_ERR, "Cannot create tmp file “%s”", parse_data.quad_fn);
            return 1;
        }
        gettimeofday(&then_last, 0);
    }

    parse_data.verbosity = verbosity;
    parse_data.model = g_strdup(model_uri);
    parse_data.model_hash = fs_hash_uri(model_uri);
    parse_data.count_trip = 0;
    parse_data.last_count = 0;
    parse_data.dryrun = dryrun;
    parse_data.has_o_index = has_o_index;

    /* store the model uri */
    buffer_res(link, segments, parse_data.model_hash, parse_data.model, FS_RID_NULL, dryrun);

    if (strcmp(format, "auto")) {
       rdf_parser = raptor_new_parser(format);
    } else if (strstr(resource_uri, ".n3") || strstr(resource_uri, ".ttl")) {
        rdf_parser = raptor_new_parser("turtle");
    } else if (strstr(resource_uri, ".nt")) {
        rdf_parser = raptor_new_parser("ntriples");
    } else {
        rdf_parser = raptor_new_parser("rdfxml");
    }
    if (!rdf_parser) {
        fs_error(LOG_ERR, "failed to create RDF parser");
        return 1;
    }

    raptor_set_statement_handler(rdf_parser, &parse_data, store_stmt);
    raptor_set_graph_handler(rdf_parser, &parse_data, graph_handler);
    ruri = raptor_new_uri((unsigned char *) resource_uri);
    parse_data.muri = raptor_new_uri((unsigned char *) model_uri);

    if (raptor_parse_uri(rdf_parser, ruri, parse_data.muri)) {
        fs_error(LOG_ERR, "failed to parse file “%s”", resource_uri);
        ret++;
    }
    if (verbosity) {
        printf("Pass 1, processed %d triples (%d)\n", total_triples_parsed, parse_data.count_trip);
    }

    raptor_free_parser(rdf_parser);
    raptor_free_uri(ruri);
    raptor_free_uri(parse_data.muri);
    g_free(parse_data.model);
    fs_hash_freshen(); /* blank nodes are unique per file */

    return ret;
}