static foreign_t turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( !wcis_name_start_char(c) ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); add_charbuf(&b, c); for(;;) { int c = Sgetcode(in); if ( wcis_name_char(c) ) { add_charbuf(&b, c); } else { int rc = ( PL_unify_integer(C, c) && PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) ); free_charbuf(&b); PL_release_stream(in); return rc; } } }
static foreign_t uri_query_components(term_t string, term_t list) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(string, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { return unify_query_string_components(list, len, s); } else if ( PL_is_list(list) ) { term_t tail = PL_copy_term_ref(list); term_t head = PL_new_term_ref(); term_t nv = PL_new_term_refs(2); charbuf out; int rc; fill_flags(); init_charbuf(&out); while( PL_get_list(tail, head, tail) ) { atom_t fname; int arity; if ( PL_is_functor(head, FUNCTOR_equal2) || PL_is_functor(head, FUNCTOR_pair2) ) { _PL_get_arg(1, head, nv+0); _PL_get_arg(2, head, nv+1); } else if ( PL_get_name_arity(head, &fname, &arity) && arity == 1 ) { PL_put_atom(nv+0, fname); _PL_get_arg(1, head, nv+1); } else { free_charbuf(&out); return type_error("name_value", head); } if ( out.here != out.base ) add_charbuf(&out, '&'); if ( !add_encoded_term_charbuf(&out, nv+0, ESC_QNAME) ) { free_charbuf(&out); return FALSE; } add_charbuf(&out, '='); if ( !add_encoded_term_charbuf(&out, nv+1, ESC_QVALUE) ) { free_charbuf(&out); return FALSE; } } rc = PL_unify_wchars(string, PL_ATOM, out.here-out.base, out.base); free_charbuf(&out); return rc; } else { return PL_get_wchars(string, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } return FALSE; }
static int iri_add_encoded_charbuf(charbuf *cb, int c, int flags) { if ( iri_no_escape(c, flags) ) { add_charbuf(cb, c); } else { assert(c < 128); add_charbuf(cb, '%'); add_charbuf(cb, hexdigit(c>>4)); add_charbuf(cb, hexdigit(c&0xf)); } return TRUE; }
static foreign_t uri_authority_components(term_t Authority, term_t components) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(Authority, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { return unify_uri_authority_components(components, len, s); } else if ( PL_is_functor(components, FUNCTOR_uri_authority4) ) { charbuf b; int rc; init_charbuf(&b); if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, ':'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } add_charbuf(&b, '@'); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } if ( (rc=get_text_arg(components, 4, &len, &s, TXT_EX_TEXT|CVT_INTEGER)) == TRUE ) { add_charbuf(&b, ':'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } rc = PL_unify_wchars(Authority, PL_ATOM, b.here-b.base, b.base); free_charbuf(&b); return rc; } else { return PL_get_wchars(Authority, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } }
static foreign_t turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '<' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); for(; ; c = Sgetcode(in)) { if ( c == '>' ) { int rc; c = Sgetcode(in); rc = (PL_unify_integer(C, c) && PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base)); PL_release_stream(in); free_charbuf(&b); return rc; } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( c == '>' ) { add_charbuf(&b, c); } else if ( string_escape(in, c, &esc) ) { add_charbuf(&b, esc); } else { free_charbuf(&b); PL_release_stream(in); return FALSE; } } else if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_uri", in); } else { add_charbuf(&b, c); } } }
static int add_decoded_range_charbuf(charbuf *cb, const range *r, int flags) { const pl_wchar_t *s = r->start; while(s<r->end) { int c; if ( *s == '%' ) { const pl_wchar_t *e; if ( (e=get_encoded_utf8(s, &c)) ) { s = e; } else if (hex(s+1, 2, &c) ) { s += 3; } else { c = *s++; } } else if ( *s == '+' && flags == ESC_QVALUE ) { s++; c = ' '; } else { c = *s++; } add_charbuf(cb, c); } return TRUE; }
static int add_encoded_charbuf(charbuf *cb, int c, int flags) { if ( no_escape(c, flags) ) { add_charbuf(cb, c); } else { char tmp[6]; const char *end = utf8_put_char(tmp, c); const char *s; for(s=tmp; s<end; s++) { int b = s[0]&0xff; add_charbuf(cb, '%'); add_charbuf(cb, hexdigit(b>>4)); add_charbuf(cb, hexdigit(b&0xf)); } } return TRUE; }
static int add_nchars_charbuf(charbuf *cb, size_t len, const pl_wchar_t *s) { if ( cb->here+len <= cb->end ) { wcsncpy(cb->here, s, len); cb->here += len; } else { size_t n; for(n=0; n<len; n++) add_charbuf(cb, s[n]); } return TRUE; }
static int ranges_in_charbuf(charbuf *cb, uri_component_ranges *ranges) { if ( ranges->scheme.start ) { add_verb_range_charbuf(cb, &ranges->scheme); add_charbuf(cb, ':'); } if ( ranges->authority.start ) { add_charbuf(cb, '/'); add_charbuf(cb, '/'); add_verb_range_charbuf(cb, &ranges->authority); } add_verb_range_charbuf(cb, &ranges->path); if ( ranges->query.start ) { add_charbuf(cb, '?'); add_verb_range_charbuf(cb, &ranges->query); } if ( ranges->fragment.start ) { add_charbuf(cb, '#'); add_verb_range_charbuf(cb, &ranges->fragment); } return TRUE; }
static int normalize_in_charbuf(charbuf *cb, uri_component_ranges *ranges, int iri) { fill_flags(); if ( ranges->scheme.start ) { add_lwr_range_charbuf(cb, &ranges->scheme, iri, ESC_SCHEME); add_charbuf(cb, ':'); } if ( ranges->authority.start ) { add_charbuf(cb, '/'); add_charbuf(cb, '/'); add_lwr_range_charbuf(cb, &ranges->authority, iri, ESC_AUTH); } if ( ranges->path.end > ranges->path.start ) { charbuf pb; charbuf path; size_t len; init_charbuf(&pb); add_range_charbuf(&pb, &ranges->path, iri, ESC_PATH); init_charbuf_at_size(&path, pb.here-pb.base); len = removed_dot_segments(pb.here-pb.base, pb.base, path.base); add_nchars_charbuf(cb, len, path.base); free_charbuf(&path); free_charbuf(&pb); } if ( ranges->query.start ) { add_charbuf(cb, '?'); add_range_charbuf(cb, &ranges->query, iri, ESC_QUERY); } if ( ranges->fragment.start ) { add_charbuf(cb, '#'); add_range_charbuf(cb, &ranges->fragment, iri, ESC_QVALUE); } return TRUE; }
static foreign_t turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; int endlen = 1; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '"' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); if ( c == '"' ) { c = Sgetcode(in); if ( c == '"' ) /* """...""" */ { endlen = 3; c = Sgetcode(in); } else { PL_release_stream(in); return (PL_unify_integer(C, c) && PL_unify_atom(Value, ATOM_)); } } for(;;c = Sgetcode(in)) { if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_string", in); } else if ( c == '"' ) { int count = 1; for(count=1; count<endlen; ) { if ( (c=Sgetcode(in)) == '"' ) count++; else break; } if ( count == endlen ) { int rc; c = Sgetcode(in); rc = (PL_unify_integer(C, c) && PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base)); free_charbuf(&b); PL_release_stream(in); return rc; } while(count-- > 0) add_charbuf(&b, '"'); add_charbuf(&b, c); } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( !string_escape(in, c, &esc) ) { free_charbuf(&b); PL_release_stream(in); return FALSE; } add_charbuf(&b, esc); } else { add_charbuf(&b, c); } } }
static foreign_t uri_components(term_t URI, term_t components) { pl_wchar_t *s; size_t len; if ( PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) ) { uri_component_ranges ranges; term_t rt = PL_new_term_refs(6); term_t av = rt+1; parse_uri(&ranges, len, s); unify_range(av+0, &ranges.scheme); unify_range(av+1, &ranges.authority); unify_range(av+2, &ranges.path); unify_range(av+3, &ranges.query); unify_range(av+4, &ranges.fragment); return (PL_cons_functor_v(rt, FUNCTOR_uri_components5, av) && PL_unify(components, rt)); } else if ( PL_is_functor(components, FUNCTOR_uri_components5) ) { charbuf b; int rc; init_charbuf(&b); /* schema */ if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); add_charbuf(&b, ':'); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* authority */ if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, '/'); add_charbuf(&b, '/'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* path */ if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* query */ if ( (rc=get_text_arg(components, 4, &len, &s, TXT_EX_TEXT)) == TRUE ) { if ( len > 0 ) { add_charbuf(&b, '?'); add_nchars_charbuf(&b, len, s); } } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } /* fragment */ if ( (rc=get_text_arg(components, 5, &len, &s, TXT_EX_TEXT)) == TRUE ) { add_charbuf(&b, '#'); add_nchars_charbuf(&b, len, s); } else if ( rc == -1 ) { free_charbuf(&b); return FALSE; } rc = PL_unify_wchars(URI, PL_ATOM, b.here-b.base, b.base); free_charbuf(&b); return rc; } else /* generate an error */ { return PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION); } }
static foreign_t resolve(term_t Rel, term_t Base, term_t URI, int normalize, int iri) { pl_wchar_t *s; size_t slen; uri_component_ranges s_ranges, t_ranges; int rc; size_t len; charbuf out, pb, path; init_charbuf(&pb); /* path-buffer */ if ( PL_get_wchars(Rel, &slen, &s, CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) ) { parse_uri(&s_ranges, slen, s); if ( s_ranges.scheme.start ) { t_ranges = s_ranges; } else { const uri_component_ranges *b_ranges; if ( !(b_ranges = base_ranges(Base)) ) return FALSE; memset(&t_ranges, 0, sizeof(t_ranges)); if ( s_ranges.authority.start ) { t_ranges.authority = s_ranges.authority; t_ranges.path = s_ranges.path; t_ranges.query = s_ranges.query; } else { if ( s_ranges.path.start == s_ranges.path.end ) { t_ranges.path = b_ranges->path; if ( s_ranges.query.start ) t_ranges.query = s_ranges.query; else t_ranges.query = b_ranges->query; } else { if ( s_ranges.path.start[0] == '/' ) { t_ranges.path = s_ranges.path; } else { if ( b_ranges->authority.start && b_ranges->path.start == b_ranges->path.end ) { add_charbuf(&pb, '/'); add_verb_range_charbuf(&pb, &s_ranges.path); } else { range path = b_ranges->path; path.end = remove_last_segment(path.start, path.end); add_verb_range_charbuf(&pb, &path); add_verb_range_charbuf(&pb, &s_ranges.path); t_ranges.path.start = pb.base; t_ranges.path.end = pb.here; } } t_ranges.query = s_ranges.query; } t_ranges.authority = b_ranges->authority; } t_ranges.scheme = b_ranges->scheme; t_ranges.fragment = s_ranges.fragment; } } else return FALSE; init_charbuf(&out); /* output buffer */ if ( normalize ) { normalize_in_charbuf(&out, &t_ranges, iri); } else { init_charbuf_at_size(&path, t_ranges.path.end - t_ranges.path.start); len = removed_dot_segments(t_ranges.path.end - t_ranges.path.start, t_ranges.path.start, path.base); t_ranges.path.start = path.base; t_ranges.path.end = path.base+len; free_charbuf(&pb); ranges_in_charbuf(&out, &t_ranges); } rc = PL_unify_wchars(URI, PL_ATOM, out.here-out.base, out.base); free_charbuf(&out); return rc; }