static SgObject strip_trailing_slash(SgObject path) { size_t s = SG_STRING_SIZE(path); if (s == 1) return path; if (SG_STRING_VALUE_AT(path, --s) == '/') { while (SG_STRING_VALUE_AT(path, s-1) == '/') s--; SG_STRING_SIZE(path) = s; } return path; }
/* TODO maybe try not to use string port? */ static SgObject normalise_path(SgObject fullpath) { SgStringPort sp; SgObject out = Sg_InitStringOutputPort(&sp, SG_STRING_SIZE(fullpath)); int64_t pos = 0; int i = 0; while (i < SG_STRING_SIZE(fullpath)) { SgChar c = SG_STRING_VALUE_AT(fullpath, i++); if (c == '.') { if (i != SG_STRING_SIZE(fullpath)) { SgChar c2 = SG_STRING_VALUE_AT(fullpath, i++); if (c2 == '.') { if (SG_STRING_VALUE_AT(fullpath, i) == '/' || SG_STRING_SIZE(fullpath) == i) { if (pos-2 > 0) { SgObject tmp = Sg_GetStringFromStringPort(out); /* skip previous '.' and '/' */ pos = search_separator(tmp, pos-2); } else { pos = 1; /* root */ } if (pos <= 0) pos = 1; /* root */ Sg_SetPortPosition(out, pos, SG_BEGIN); if (pos == 1) i++; } else { /* ok just a file named '..?' or longer*/ Sg_PutcUnsafe(out, '.'); Sg_PutcUnsafe(out, '.'); pos += 2; for (; i < SG_STRING_SIZE(fullpath); i++) { SgChar c3 = SG_STRING_VALUE_AT(fullpath, i); if (c3 != '.') break; Sg_PutcUnsafe(out, c3); pos++; } } } else if (c2 != '/') { Sg_PutcUnsafe(out, '.'); Sg_PutcUnsafe(out, c2); pos += 2; } } } else { Sg_PutcUnsafe(out, c); pos++; } } return strip_trailing_slash(Sg_GetStringFromStringPort(out)); }
static int glob_match1(SgObject pat, SgObject path_element, int flags) { const int period = !(flags & SG_DOTMATCH); /* Flags are taken from Ruby but I don't know what FNM_PATHNAME does on glob. so ignore.*/ /* const int pathname = flags & SG_PATHNAME; */ int pos = 0; SgObject cp; if (period) { if (SG_STRING_VALUE_AT(path_element, 0) == '.' && /* leading period */ !(SG_STRINGP(SG_CAR(pat)) && SG_STRING_VALUE_AT(SG_STRING(SG_CAR(pat)), 0) == '.')) { return FALSE; } } SG_FOR_EACH(cp, pat) { /* the matching is pretty much simple, a rule may contain the followings: - string - charset - pattern (regular expression) these are resolved by prefix match, one char match or regex match, respectively. */ SgObject p = SG_CAR(cp); if (pos >= SG_STRING_SIZE(path_element)) return FALSE; if (SG_STRINGP(p)) { int i; for (i = 0; i < SG_STRING_SIZE(p); i++) { if (!SG_EQ(SG_STRING_VALUE_AT(p, i), SG_STRING_VALUE_AT(path_element, pos++))) { return FALSE; } } } else if (SG_CHAR_SET_P(p)) { if (!Sg_CharSetContains(p, SG_STRING_VALUE_AT(path_element, pos++))) { return FALSE; } } else if (SG_PATTERNP(p)) { SgMatcher *m = Sg_RegexTextMatcher(SG_PATTERN(p), path_element, pos, SG_STRING_SIZE(path_element)); return Sg_RegexTextMatches(SG_TEXT_MATCHER(m)); } else { Sg_Error(UC("[Internal] Unknown glob rule '%S' in '%S'"), p, pat); return FALSE; /* dummy */ } }
/* from mosh */ static const wchar_t* utf32ToUtf16(SgString *path) { int size = SG_STRING_SIZE(path); SgCodec *codec = Sg_MakeUtf16Codec(UTF_16LE); SgTranscoder *tcoder = Sg_MakeTranscoder(codec, LF, SG_REPLACE_ERROR); SgPort *out, *tout; SgBytePort bp; SgTranscodedPort tp; out = Sg_InitByteArrayOutputPort(&bp, sizeof(wchar_t) * (size + 1)); tout = Sg_InitTranscodedPort(&tp, out, tcoder, SG_OUTPUT_PORT); Sg_TranscoderWrite(tcoder, tout, SG_STRING_VALUE(path), SG_STRING_SIZE(path)); Sg_TranscoderPutc(tcoder, tout, '\0'); return (const wchar_t*)Sg_GetByteArrayFromBinaryPort(&bp); }
static int next_dirsep(SgObject path, int skipped) { while (skipped < SG_STRING_SIZE(path) && !dirsep_p(S(path, skipped))) { skipped++; } return skipped; }
SgObject Sg_BuildPath(SgString *path, SgString *file) { int psize = SG_STRING_SIZE(path), fsize = SG_STRING_SIZE(file); int i, j, offset = 1; SgObject ret; if (SG_STRING_VALUE_AT(path, psize-1) == '/') offset--; ret = Sg_ReserveString(psize + fsize + offset, 0); for (i = 0; i < psize; i++) { SG_STRING_VALUE_AT(ret, i) = SG_STRING_VALUE_AT(path, i); } if (offset) { SG_STRING_VALUE_AT(ret, i++) = '/'; } for (j = 0; j < fsize; i++, j++) { SG_STRING_VALUE_AT(ret, i) = SG_STRING_VALUE_AT(file, j); } return ret; }
SgObject Sg_DirectoryName(SgString *path) { int size = SG_STRING_SIZE(path), i; for (i = size-1; i >= 0; i--) { if (SG_STRING_VALUE_AT(path, i) == '/') break; } if (i <= 0) return SG_FALSE; return Sg_Substring(path, 0, i); }
static SgObject remove_backslashes(SgObject path) { int i, j, count = 0; SgObject r; for (i = 0; i < SG_STRING_SIZE(path); i++) { if (SG_STRING_VALUE_AT(path, i) != '\\') count++; } /* no backslash */ if (SG_STRING_SIZE(path) == count) return path; r = Sg_ReserveString(count, '\0'); for (i = 0, j = 0; i < SG_STRING_SIZE(path); i++) { if (SG_STRING_VALUE_AT(path, i) != '\\') { SG_STRING_VALUE_AT(r, j++) = SG_STRING_VALUE_AT(path, i); } } return r; }
/* TODO Should we skip? */ static int detect_prefix(SgObject path) { /* network address or so e.g. \\foo\bar */ if (dirsep_p(S(path,0)) && dirsep_p(S(path,1))) { int skipped = 2; while (dirsep_p(S(path, skipped))) { skipped++; } if ((skipped = next_dirsep(path, skipped)) < SG_STRING_SIZE(path) && skipped+1 < SG_STRING_SIZE(path) && !dirsep_p(S(path, skipped+1))) { skipped = next_dirsep(path, skipped+1); } return skipped; } if (has_drive_letter(path)) { return 2; } return 0; }
/* converts given path template to pattern e.g.) - "foo/bar/\*" -> (("foo") ("bar") (ANY)) - "foo/bar/buz*" -> (("foo") ("bar") ("buz" ANY)) - "foo/bar/[b][u]z*" -> (("foo") ("bar") ([b] [u] "z" ANY)) each element of the list represents a matching rule of path element. */ static int find_close_bracket(SgString *path, int start, int flags) { const int escape = !(flags & SG_NOESCAPE); int i; for (i = start; i < SG_STRING_SIZE(path); i++) { switch (SG_STRING_VALUE_AT(path, i)) { case ']': return i; case '\\': if (escape) i++; break; } } return start; }
static SgObject glob_make_pattern(SgString *path, int flags) { const int escape = !(flags & SG_NOESCAPE); SgObject h = SG_NIL, t = SG_NIL, h1 = SG_NIL, t1 = SG_NIL; int i, start; #define emit() \ do { \ if (start != i) { \ SgObject tmp = Sg_Substring(path, start, i); \ if (escape) tmp = remove_backslashes(tmp); \ SG_APPEND1(h1, t1, tmp); \ } \ start = i+1; \ } while (0) for (i = 0, start = 0; i < SG_STRING_SIZE(path);) { SgChar c = SG_STRING_VALUE_AT(path, i); switch (c) { case '[': { int s = i, e; e = find_close_bracket(path, start, flags); if (s != e) { emit(); SG_APPEND1(h1, t1, Sg_ParseCharSetString(path, FALSE, s, i=++e)); start = i; } i++; } break; case '/': /* next */ emit(); /* if the path starts with '/', then this can be null */ if (!SG_NULLP(h1)) { SG_APPEND1(h, t, convert_star(h1)); } h1 = t1 = SG_NIL; /* reset it */ /* this need to be updated */ start = ++i; break; case '*': { int has = (start != i); emit(); /* merge it if it's there */ if (!has && SG_STRING_SIZE(path) - i >= 3 && SG_STRING_VALUE_AT(path, i+1) == '*' && SG_STRING_VALUE_AT(path, i+2) == '/') { do { i += 3; /* skip '/' */ while (SG_STRING_VALUE_AT(path, i) == '/') i++; } while (SG_STRING_VALUE_AT(path, i) == '*' && SG_STRING_VALUE_AT(path, i+1) == '*' && SG_STRING_VALUE_AT(path, i+2) == '/'); SG_APPEND1(h1, t1, STAR_SLASH); SG_APPEND1(h, t, h1); h1 = t1 = SG_NIL; /* reset it */ start = i; } else { SG_APPEND1(h1, t1, STAR); while (SG_STRING_VALUE_AT(path, i) == '*') i++; } break; } case '?': emit(); SG_APPEND1(h1, t1, FULL_CHARSET); i++; break; default: i++; break; } } emit(); if (!SG_NULLP(h1)) { SG_APPEND1(h, t, convert_star(h1)); SG_APPEND1(h, t, SG_LIST1(ANY)); } else { SG_APPEND1(h, t, SG_LIST1(DIR)); } #undef emit return h; }
static SgObject brace_expand(SgString *str, int flags) { const int escape = !(flags & SG_NOESCAPE); int lbrace = 0, rbrace = 0, nest = 0, i; int haslb = FALSE, hasrb = FALSE; /* find { and }*/ for (i = 0; i < SG_STRING_SIZE(str); i++) { if (SG_STRING_VALUE_AT(str, i) == '{' && nest++ == 0) { lbrace = i; haslb = TRUE; } if (SG_STRING_VALUE_AT(str, i) == '}' && --nest == 0) { rbrace = i; hasrb = TRUE; break; } if (SG_STRING_VALUE_AT(str, i) == '\\' && escape) { if (++i == SG_STRING_SIZE(str)) break; } } /* make "foo/{a,b}" to ("foo/a" "foo/b") */ if (haslb && hasrb) { SgObject h = SG_NIL, t = SG_NIL; SgPort *out; SgStringPort tp; int i; /* copy value until the first '{' */ out = Sg_InitStringOutputPort(&tp, 255); for (i = 0; i < lbrace; i++) { Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, i)); } /* skip '{' */ i++; while (i < rbrace) { /* now we need to copy one by one */ int nest = 0, j; SgObject tmp; for (;SG_STRING_VALUE_AT(str, i) != ',' || nest != 0; i++) { if (i >= rbrace) break; if (SG_STRING_VALUE_AT(str, i) == '{') nest++; if (SG_STRING_VALUE_AT(str, i) == '}') nest--; if (SG_STRING_VALUE_AT(str, i) == '\\' && escape) { if (++i == rbrace) break; } Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, i)); } /* skip ',' */ i++; /* copy after the '}' */ for (j = rbrace+1; j < SG_STRING_SIZE(str); j++) { Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, j)); } tmp = Sg_GetStringFromStringPort(&tp); SG_APPEND(h, t, brace_expand(tmp, flags)); /* back to the starting position */ Sg_SetPortPosition(out, lbrace, SG_BEGIN); } SG_CLEAN_STRING_PORT(&tp); return h; } else { return SG_LIST1(str); } }