static foreign_t turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( !wcis_name_start_char(c) ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); add_charbuf(&b, c); for(;;) { int c = Sgetcode(in); if ( wcis_name_char(c) ) { add_charbuf(&b, c); } else { int rc = ( PL_unify_integer(C, c) && PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) ); free_charbuf(&b); PL_release_stream(in); return rc; } } }
static foreign_t uri_query_components(term_t string, term_t list) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(string, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { return unify_query_string_components(list, len, s); } else if ( PL_is_list(list) ) { term_t tail = PL_copy_term_ref(list); term_t head = PL_new_term_ref(); term_t nv = PL_new_term_refs(2); charbuf out; int rc; fill_flags(); init_charbuf(&out); while( PL_get_list(tail, head, tail) ) { atom_t fname; int arity; if ( PL_is_functor(head, FUNCTOR_equal2) || PL_is_functor(head, FUNCTOR_pair2) ) { _PL_get_arg(1, head, nv+0); _PL_get_arg(2, head, nv+1); } else if ( PL_get_name_arity(head, &fname, &arity) && arity == 1 ) { PL_put_atom(nv+0, fname); _PL_get_arg(1, head, nv+1); } else { free_charbuf(&out); return type_error("name_value", head); } if ( out.here != out.base ) add_charbuf(&out, '&'); if ( !add_encoded_term_charbuf(&out, nv+0, ESC_QNAME) ) { free_charbuf(&out); return FALSE; } add_charbuf(&out, '='); if ( !add_encoded_term_charbuf(&out, nv+1, ESC_QVALUE) ) { free_charbuf(&out); return FALSE; } } rc = PL_unify_wchars(string, PL_ATOM, out.here-out.base, out.base); free_charbuf(&out); return rc; } else { return PL_get_wchars(string, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } return FALSE; }
static foreign_t uri_authority_components(term_t Authority, term_t components) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(Authority, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { return unify_uri_authority_components(components, len, s); } else if ( PL_is_functor(components, FUNCTOR_uri_authority4) ) { charbuf b; int rc; init_charbuf(&b); if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, ':'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } add_charbuf(&b, '@'); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } if ( (rc=get_text_arg(components, 4, &len, &s, TXT_EX_TEXT|CVT_INTEGER)) == TRUE ) { add_charbuf(&b, ':'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } rc = PL_unify_wchars(Authority, PL_ATOM, b.here-b.base, b.base); free_charbuf(&b); return rc; } else { return PL_get_wchars(Authority, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } }
static int unify_decoded_atom(term_t t, range *r, int flags) { if ( range_has_escape(r, flags) ) { charbuf b; int rc; init_charbuf(&b); add_decoded_range_charbuf(&b, r, flags); rc = PL_unify_wchars(t, PL_ATOM, b.here - b.base, b.base); free_charbuf(&b); return rc; } else { return unify_range(t, r); } }
static foreign_t turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '<' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); for(; ; c = Sgetcode(in)) { if ( c == '>' ) { int rc; c = Sgetcode(in); rc = (PL_unify_integer(C, c) && PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base)); PL_release_stream(in); free_charbuf(&b); return rc; } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( c == '>' ) { add_charbuf(&b, c); } else if ( string_escape(in, c, &esc) ) { add_charbuf(&b, esc); } else { free_charbuf(&b); PL_release_stream(in); return FALSE; } } else if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_uri", in); } else { add_charbuf(&b, c); } } }
static foreign_t uri_encoded(term_t what, term_t qv, term_t enc) { pl_wchar_t *s; size_t len; atom_t w; int flags; if ( !PL_get_atom(what, &w) ) return type_error("atom", what); if ( w == ATOM_query_value ) flags = ESC_QVALUE; else if ( w == ATOM_fragment ) flags = ESC_FRAGMENT; else if ( w == ATOM_path ) flags = ESC_PATH; else return domain_error("uri_component", what); fill_flags(); if ( !PL_is_variable(qv) ) { charbuf out; int rc; init_charbuf(&out); if ( !add_encoded_term_charbuf(&out, qv, flags) ) { free_charbuf(&out); return FALSE; } rc = PL_unify_wchars(enc, PL_ATOM, out.here-out.base, out.base); free_charbuf(&out); return rc; } else if ( PL_get_wchars(enc, &len, &s, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) ) { range r; r.start = s; r.end = s+len; return unify_decoded_atom(qv, &r, flags); } else { return FALSE; } }
static foreign_t normalized(term_t URI, term_t CannonicalURI, int iri) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) ) { uri_component_ranges ranges; charbuf b; int rc; parse_uri(&ranges, len, s); init_charbuf(&b); normalize_in_charbuf(&b, &ranges, iri); rc = PL_unify_wchars(CannonicalURI, PL_ATOM, b.here-b.base, b.base); free_charbuf(&b); return rc; } return FALSE; }
static int normalize_in_charbuf(charbuf *cb, uri_component_ranges *ranges, int iri) { fill_flags(); if ( ranges->scheme.start ) { add_lwr_range_charbuf(cb, &ranges->scheme, iri, ESC_SCHEME); add_charbuf(cb, ':'); } if ( ranges->authority.start ) { add_charbuf(cb, '/'); add_charbuf(cb, '/'); add_lwr_range_charbuf(cb, &ranges->authority, iri, ESC_AUTH); } if ( ranges->path.end > ranges->path.start ) { charbuf pb; charbuf path; size_t len; init_charbuf(&pb); add_range_charbuf(&pb, &ranges->path, iri, ESC_PATH); init_charbuf_at_size(&path, pb.here-pb.base); len = removed_dot_segments(pb.here-pb.base, pb.base, path.base); add_nchars_charbuf(cb, len, path.base); free_charbuf(&path); free_charbuf(&pb); } if ( ranges->query.start ) { add_charbuf(cb, '?'); add_range_charbuf(cb, &ranges->query, iri, ESC_QUERY); } if ( ranges->fragment.start ) { add_charbuf(cb, '#'); add_range_charbuf(cb, &ranges->fragment, iri, ESC_QVALUE); } return TRUE; }
static foreign_t turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; int endlen = 1; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '"' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); if ( c == '"' ) { c = Sgetcode(in); if ( c == '"' ) /* """...""" */ { endlen = 3; c = Sgetcode(in); } else { PL_release_stream(in); return (PL_unify_integer(C, c) && PL_unify_atom(Value, ATOM_)); } } for(;;c = Sgetcode(in)) { if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_string", in); } else if ( c == '"' ) { int count = 1; for(count=1; count<endlen; ) { if ( (c=Sgetcode(in)) == '"' ) count++; else break; } if ( count == endlen ) { int rc; c = Sgetcode(in); rc = (PL_unify_integer(C, c) && PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base)); free_charbuf(&b); PL_release_stream(in); return rc; } while(count-- > 0) add_charbuf(&b, '"'); add_charbuf(&b, c); } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( !string_escape(in, c, &esc) ) { free_charbuf(&b); PL_release_stream(in); return FALSE; } add_charbuf(&b, esc); } else { add_charbuf(&b, c); } } }
static foreign_t uri_components(term_t URI, term_t components) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { uri_component_ranges ranges; term_t rt = PL_new_term_refs(6); term_t av = rt+1; parse_uri(&ranges, len, s); unify_range(av+0, &ranges.scheme); unify_range(av+1, &ranges.authority); unify_range(av+2, &ranges.path); unify_range(av+3, &ranges.query); unify_range(av+4, &ranges.fragment); return (PL_cons_functor_v(rt, FUNCTOR_uri_components5, av) && PL_unify(components, rt)); } else if ( PL_is_functor(components, FUNCTOR_uri_components5) ) { charbuf b; int rc; init_charbuf(&b); /* schema */ if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); add_charbuf(&b, ':'); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* authority */ if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, '/'); add_charbuf(&b, '/'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* path */ if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* query */ if ( (rc=get_text_arg(components, 4, &len, &s, TXT_EX_TEXT)) == TRUE ) { if ( len > 0 ) { add_charbuf(&b, '?'); add_nchars_charbuf(&b, len, s); } } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* fragment */ if ( (rc=get_text_arg(components, 5, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, '#'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } rc = PL_unify_wchars(URI, PL_ATOM, b.here-b.base, b.base); free_charbuf(&b); return rc; } else /* generate an error */ { return PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } }
static foreign_t resolve(term_t Rel, term_t Base, term_t URI, int normalize, int iri) { pl_wchar_t *s; size_t slen; uri_component_ranges s_ranges, t_ranges; int rc; size_t len; charbuf out, pb, path; init_charbuf(&pb); /* path-buffer */ if ( PL_get_wchars(Rel, &slen, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) ) { parse_uri(&s_ranges, slen, s); if ( s_ranges.scheme.start ) { t_ranges = s_ranges; } else { const uri_component_ranges *b_ranges; if ( !(b_ranges = base_ranges(Base)) ) return FALSE; memset(&t_ranges, 0, sizeof(t_ranges)); if ( s_ranges.authority.start ) { t_ranges.authority = s_ranges.authority; t_ranges.path = s_ranges.path; t_ranges.query = s_ranges.query; } else { if ( s_ranges.path.start == s_ranges.path.end ) { t_ranges.path = b_ranges->path; if ( s_ranges.query.start ) t_ranges.query = s_ranges.query; else t_ranges.query = b_ranges->query; } else { if ( s_ranges.path.start[0] == '/' ) { t_ranges.path = s_ranges.path; } else { if ( b_ranges->authority.start && b_ranges->path.start == b_ranges->path.end ) { add_charbuf(&pb, '/'); add_verb_range_charbuf(&pb, &s_ranges.path); } else { range path = b_ranges->path; path.end = remove_last_segment(path.start, path.end); add_verb_range_charbuf(&pb, &path); add_verb_range_charbuf(&pb, &s_ranges.path); t_ranges.path.start = pb.base; t_ranges.path.end = pb.here; } } t_ranges.query = s_ranges.query; } t_ranges.authority = b_ranges->authority; } t_ranges.scheme = b_ranges->scheme; t_ranges.fragment = s_ranges.fragment; } } else return FALSE; init_charbuf(&out); /* output buffer */ if ( normalize ) { normalize_in_charbuf(&out, &t_ranges, iri); } else { init_charbuf_at_size(&path, t_ranges.path.end - t_ranges.path.start); len = removed_dot_segments(t_ranges.path.end - t_ranges.path.start, t_ranges.path.start, path.base); t_ranges.path.start = path.base; t_ranges.path.end = path.base+len; free_charbuf(&pb); ranges_in_charbuf(&out, &t_ranges); } rc = PL_unify_wchars(URI, PL_ATOM, out.here-out.base, out.base); free_charbuf(&out); return rc; }