void RDFParserSerd::doParse(const char *fileName, const char *baseUri, RDFNotation notation, bool ignoreErrors, RDFCallback *callback) { this->callback = callback; this->numByte = fileUtil::getSize(fileName); // Create Base URI and environment SerdURI base_uri = SERD_URI_NULL; SerdNode base = serd_node_new_file_uri((const uint8_t *)fileName, NULL, &base_uri, false); env = serd_env_new(&base); SerdReader* reader = serd_reader_new( getParserType(notation), this, NULL, (SerdBaseSink)hdtserd_basechanged, (SerdPrefixSink)hdtserd_prefixchanged, (SerdStatementSink)hdtserd_process_triple, (SerdEndSink)hdtserd_end); serd_reader_set_error_sink(reader, hdtserd_error, NULL); const uint8_t* input=serd_uri_to_path((const uint8_t *)fileName); FILE *in_fd = fopen((const char*)input, "r"); // TODO: fadvise sequential if(in_fd==NULL) { throw "Could not open input file for parsing"; } SerdStatus status = serd_reader_read_file_handle(reader, in_fd, (const uint8_t *)fileName); serd_reader_free(reader); fclose(in_fd); serd_env_free(env); serd_node_free(&base); }
SERD_API SerdStatus serd_reader_read_file(SerdReader* reader, const uint8_t* uri) { uint8_t* const path = serd_file_uri_parse(uri, NULL); if (!path) { return SERD_ERR_BAD_ARG; } FILE* fd = serd_fopen((const char*)path, "r"); if (!fd) { free(path); return SERD_ERR_UNKNOWN; } SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); fclose(fd); free(path); return ret; }
int main(int argc, char** argv) { if (argc < 2) { return print_usage(argv[0], true); } FILE* in_fd = NULL; SerdSyntax input_syntax = SERD_TURTLE; SerdSyntax output_syntax = SERD_NTRIPLES; bool from_file = true; const uint8_t* in_name = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { in_name = (const uint8_t*)"(stdin)"; in_fd = stdin; break; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'v') { return print_version(); } else if (argv[a][1] == 's') { in_name = (const uint8_t*)"(string)"; from_file = false; ++a; break; } else if (argv[a][1] == 'i') { if (++a == argc) { fprintf(stderr, "Missing value for -i\n"); return 1; } if (!set_syntax(&input_syntax, argv[a])) { return 1; } } else if (argv[a][1] == 'o') { if (++a == argc) { fprintf(stderr, "Missing value for -o\n"); return 1; } if (!set_syntax(&output_syntax, argv[a])) { return 1; } } else { fprintf(stderr, "Unknown option `%s'\n", argv[a]); return print_usage(argv[0], true); } } if (a == argc) { fprintf(stderr, "Missing input\n"); return 1; } const uint8_t* input = (const uint8_t*)argv[a++]; uint8_t* in_path = NULL; if (from_file) { in_name = in_name ? in_name : input; if (!in_fd) { in_path = absolute_path(serd_uri_to_path(in_name)); if (!in_path || !(in_fd = fopen((const char*)in_path, "rb"))) { return 1; } } } SerdURI base_uri = SERD_URI_NULL; SerdNode base_uri_node = SERD_NODE_NULL; if (a < argc) { // Base URI given on command line base_uri_node = serd_node_new_uri_from_string( (const uint8_t*)argv[a], NULL, &base_uri); } else if (from_file) { // Use input file URI base_uri_node = serd_node_new_file_uri(in_path, NULL, &base_uri, false); } if (!base_uri_node.buf) { fprintf(stderr, "Missing base URI\n"); return 1; } SordWorld* world = sord_world_new(); SordModel* sord = sord_new(world, SORD_SPO|SORD_OPS, false); SerdEnv* env = serd_env_new(&base_uri_node); SerdReader* reader = sord_new_reader(sord, env, input_syntax, NULL); const SerdStatus status = (from_file) ? serd_reader_read_file_handle(reader, in_fd, in_name) : serd_reader_read_string(reader, input); serd_reader_free(reader); fprintf(stderr, "Loaded %lu statements\n", (unsigned long)sord_num_quads(sord)); SerdEnv* write_env = serd_env_new(&base_uri_node); int output_style = SERD_STYLE_RESOLVED; if (output_syntax == SERD_NTRIPLES) { output_style |= SERD_STYLE_ASCII; } else { output_style |= SERD_STYLE_CURIED | SERD_STYLE_ABBREVIATED; } SerdWriter* writer = serd_writer_new( output_syntax, (SerdStyle)output_style, write_env, &base_uri, serd_file_sink, stdout); // Write @prefix directives serd_env_foreach(env, (SerdPrefixSink)serd_writer_set_prefix, writer); // Write statements sord_write(sord, writer, NULL); serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_env_free(write_env); serd_node_free(&base_uri_node); sord_free(sord); sord_world_free(world); return (status > SERD_FAILURE) ? 1 : 0; }
int main(int argc, char** argv) { if (argc < 2) { return print_usage(argv[0], true); } FILE* in_fd = NULL; SerdSyntax input_syntax = SERD_TURTLE; SerdSyntax output_syntax = SERD_NTRIPLES; bool from_file = true; bool bulk_read = true; bool bulk_write = false; bool full_uris = false; bool lax = false; bool quiet = false; const uint8_t* in_name = NULL; const uint8_t* add_prefix = NULL; const uint8_t* chop_prefix = NULL; const uint8_t* root_uri = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { in_name = (const uint8_t*)"(stdin)"; in_fd = stdin; break; } else if (argv[a][1] == 'b') { bulk_write = true; } else if (argv[a][1] == 'e') { bulk_read = false; } else if (argv[a][1] == 'f') { full_uris = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'l') { lax = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { return print_version(); } else if (argv[a][1] == 's') { in_name = (const uint8_t*)"(string)"; from_file = false; ++a; break; } else if (argv[a][1] == 'i') { if (++a == argc) { return missing_arg(argv[0], 'i'); } else if (!set_syntax(&input_syntax, argv[a])) { return print_usage(argv[0], true); } } else if (argv[a][1] == 'o') { if (++a == argc) { return missing_arg(argv[0], 'o'); } else if (!set_syntax(&output_syntax, argv[a])) { return print_usage(argv[0], true); } } else if (argv[a][1] == 'p') { if (++a == argc) { return missing_arg(argv[0], 'p'); } add_prefix = (const uint8_t*)argv[a]; } else if (argv[a][1] == 'c') { if (++a == argc) { return missing_arg(argv[0], 'c'); } chop_prefix = (const uint8_t*)argv[a]; } else if (argv[a][1] == 'r') { if (++a == argc) { return missing_arg(argv[0], 'r'); } root_uri = (const uint8_t*)argv[a]; } else { SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); return print_usage(argv[0], true); } } if (a == argc) { SERDI_ERROR("missing input\n"); return 1; } const uint8_t* input = (const uint8_t*)argv[a++]; if (from_file) { in_name = in_name ? in_name : input; if (!in_fd) { input = serd_uri_to_path(in_name); if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) { return 1; } } } SerdURI base_uri = SERD_URI_NULL; SerdNode base = SERD_NODE_NULL; if (a < argc) { // Base URI given on command line base = serd_node_new_uri_from_string( (const uint8_t*)argv[a], NULL, &base_uri); } else if (from_file && in_fd != stdin) { // Use input file URI base = serd_node_new_file_uri(input, NULL, &base_uri, false); } FILE* out_fd = stdout; SerdEnv* env = serd_env_new(&base); int output_style = 0; if (output_syntax == SERD_NTRIPLES) { output_style |= SERD_STYLE_ASCII; } else { output_style |= SERD_STYLE_ABBREVIATED; if (!full_uris) { output_style |= SERD_STYLE_CURIED; } } if (input_syntax != SERD_NTRIPLES || (output_style & SERD_STYLE_CURIED)) { // Base URI may change and/or we're abbreviating URIs, so must resolve output_style |= SERD_STYLE_RESOLVED; // Base may chan } if (bulk_write) { output_style |= SERD_STYLE_BULK; } SerdWriter* writer = serd_writer_new( output_syntax, (SerdStyle)output_style, env, &base_uri, serd_file_sink, out_fd); SerdReader* reader = serd_reader_new( input_syntax, writer, NULL, (SerdBaseSink)serd_writer_set_base_uri, (SerdPrefixSink)serd_writer_set_prefix, (SerdStatementSink)serd_writer_write_statement, (SerdEndSink)serd_writer_end_anon); serd_reader_set_strict(reader, !lax); if (quiet) { serd_reader_set_error_sink(reader, quiet_error_sink, NULL); serd_writer_set_error_sink(writer, quiet_error_sink, NULL); } SerdNode root = serd_node_from_string(SERD_URI, root_uri); serd_writer_set_root_uri(writer, &root); serd_writer_chop_blank_prefix(writer, chop_prefix); serd_reader_add_blank_prefix(reader, add_prefix); SerdStatus status = SERD_SUCCESS; if (!from_file) { status = serd_reader_read_string(reader, input); } else if (bulk_read) { status = serd_reader_read_file_handle(reader, in_fd, in_name); } else { status = serd_reader_start_stream(reader, in_fd, in_name, false); while (!status) { status = serd_reader_read_chunk(reader); } serd_reader_end_stream(reader); } serd_reader_free(reader); if (from_file) { fclose(in_fd); } serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_node_free(&base); return (status > SERD_FAILURE) ? 1 : 0; }
int main(int argc, char** argv) { if (argc < 2) { return print_usage(argv[0], true); } FILE* in_fd = NULL; SerdSyntax input_syntax = SERD_TURTLE; SerdSyntax output_syntax = SERD_NTRIPLES; bool from_file = true; bool bulk_write = false; bool full_uris = false; const uint8_t* in_name = NULL; const uint8_t* add_prefix = NULL; const uint8_t* chop_prefix = NULL; const uint8_t* root_uri = NULL; int a = 1; for (; a < argc && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { in_name = (const uint8_t*)"(stdin)"; in_fd = stdin; break; } else if (argv[a][1] == 'b') { bulk_write = true; } else if (argv[a][1] == 'f') { full_uris = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); } else if (argv[a][1] == 'v') { return print_version(); } else if (argv[a][1] == 's') { in_name = (const uint8_t*)"(string)"; from_file = false; ++a; break; } else if (argv[a][1] == 'i') { if (++a == argc || !set_syntax(&input_syntax, argv[a])) { return bad_arg(argv[0], 'i'); } } else if (argv[a][1] == 'o') { if (++a == argc || !set_syntax(&output_syntax, argv[a])) { return bad_arg(argv[0], 'o'); } } else if (argv[a][1] == 'p') { if (++a == argc) { return bad_arg(argv[0], 'p'); } add_prefix = (const uint8_t*)argv[a]; } else if (argv[a][1] == 'c') { if (++a == argc) { return bad_arg(argv[0], 'c'); } chop_prefix = (const uint8_t*)argv[a]; } else if (argv[a][1] == 'r') { if (++a == argc) { return bad_arg(argv[0], 'r'); } root_uri = (const uint8_t*)argv[a]; } else { fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]); return print_usage(argv[0], true); } } if (a == argc) { fprintf(stderr, "%s: Missing input\n", argv[0]); return 1; } const uint8_t* input = (const uint8_t*)argv[a++]; if (from_file) { in_name = in_name ? in_name : input; if (!in_fd) { input = serd_uri_to_path(in_name); if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) { return 1; } } } SerdURI base_uri = SERD_URI_NULL; SerdNode base = SERD_NODE_NULL; if (a < argc) { // Base URI given on command line base = serd_node_new_uri_from_string( (const uint8_t*)argv[a], NULL, &base_uri); } else if (from_file && in_fd != stdin) { // Use input file URI base = serd_node_new_file_uri(input, NULL, &base_uri, false); } FILE* out_fd = stdout; SerdEnv* env = serd_env_new(&base); int output_style = 0; if (output_syntax == SERD_NTRIPLES) { output_style |= SERD_STYLE_ASCII; } else { output_style |= SERD_STYLE_ABBREVIATED; if (!full_uris) { output_style |= SERD_STYLE_CURIED; } } if (input_syntax != SERD_NTRIPLES // Base URI may change (@base) || (output_syntax == SERD_TURTLE)) { output_style |= SERD_STYLE_RESOLVED; } if (bulk_write) { output_style |= SERD_STYLE_BULK; } SerdWriter* writer = serd_writer_new( output_syntax, (SerdStyle)output_style, env, &base_uri, serd_file_sink, out_fd); SerdReader* reader = serd_reader_new( input_syntax, writer, NULL, (SerdBaseSink)serd_writer_set_base_uri, (SerdPrefixSink)serd_writer_set_prefix, (SerdStatementSink)serd_writer_write_statement, (SerdEndSink)serd_writer_end_anon); SerdNode root = serd_node_from_string(SERD_URI, root_uri); serd_writer_set_root_uri(writer, &root); serd_writer_chop_blank_prefix(writer, chop_prefix); serd_reader_add_blank_prefix(reader, add_prefix); const SerdStatus status = (from_file) ? serd_reader_read_file_handle(reader, in_fd, in_name) : serd_reader_read_string(reader, input); serd_reader_free(reader); if (from_file) { fclose(in_fd); } serd_writer_finish(writer); serd_writer_free(writer); serd_env_free(env); serd_node_free(&base); return (status > SERD_FAILURE) ? 1 : 0; }