// TODO: can we fail allocating memory? static VALUE fenix_file_expand_path(int argc, VALUE *argv) { size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0; size_t buffer_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT cp; VALUE result = Qnil, path = Qnil, dir = Qnil; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; // prepare for rb_file_absolute_path() int abs_mode = 0; // retrieve path and dir from argv rb_scan_args(argc, argv, "11", &path, &dir); /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); // get path encoding if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = fenix_code_page(path_encoding); // printf("code page: %i\n", cp); // coerce them to string path = fenix_coerce_to_path(path); // convert char * to wchar_t // path fenix_path_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp); // wprintf(L"wpath: '%s' with (%i) characters long.\n", wpath, wpath_len); /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') || (wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) { /* tainted if expanding '~' */ tainted = 1; // wprintf(L"wpath requires expansion.\n"); whome = fenix_home_dir(); if (whome == NULL) { free(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { free(wpath); rb_raise(rb_eArgError, "non-absolute home"); } // wprintf(L"whome: '%s' with (%i) characters long.\n", whome, whome_len); /* ignores dir since we are expading home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { // wprintf(L"excluding expansion character and separator\n"); wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ // wprintf(L"Ignore dir since we have drive letter and root slash\n"); ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { wchar_t *wuser = wpath_pos + 1; wchar_t *pos = wuser; char *user; /* tainted if expanding '~' */ tainted = 1; while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0') pos++; *pos = '\0'; size = WideCharToMultiByte(cp, 0, wuser, -1, NULL, 0, NULL, NULL); user = (char *)malloc(size * sizeof(char)); WideCharToMultiByte(cp, 0, wuser, -1, user, size, NULL, NULL); /* convert to VALUE and set the path encoding */ result = rb_enc_str_new(user, size - 1, path_encoding); free(wpath); if (user) free(user); rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { // coerce them to string dir = fenix_coerce_to_path(dir); // convert char * to wchar_t // dir fenix_path_to_wchar(dir, &wdir, NULL, &wdir_len, cp); // wprintf(L"wdir: '%s' with (%i) characters long.\n", wdir, wdir_len); if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ size_t separators = 0; size_t pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; // wprintf(L"UNC wdir: '%s' with (%i) characters.\n", wdir, wdir_len); } } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) == towupper(dir_drive)) { /* exclude path drive letter to use dir */ // wprintf(L"excluding path drive letter\n"); wpath_pos += 2; wpath_len -= 2; } else { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } // wprintf(L"wpath_len: %i\n", wpath_len); // wprintf(L"wdir_len: %i\n", wdir_len); // wprintf(L"whome_len: %i\n", whome_len); buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; // wprintf(L"buffer_len: %i\n", buffer_len + 1); buffer = buffer_pos = (wchar_t *)malloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { // wprintf(L"Copying whome...\n"); wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { // wprintf(L"Adding separator after whome\n"); buffer_pos[0] = L'\\'; buffer_pos++; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; // wprintf(L"Copying wdir...\n"); wcsncpy(buffer_pos, wdir, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { // wprintf(L"Adding separator after wdir\n"); buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { // wprintf(L"Copying wpath...\n"); wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { // wprintf(L"Adding '.' to buffer\n"); buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) { tainted = 1; } // wprintf(L"buffer: '%s'\n", buffer); // FIXME: Make this more robust // Determine require buffer size size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size) { if (size > PATH_BUFFER_SIZE) { // allocate enough memory to contain the response wfullpath = (wchar_t *)malloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } // wprintf(L"wfullpath: '%s'\n", wfullpath); /* Calculate the new size and leave the garbage out */ // size = wcslen(wfullpath); /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { // wprintf(L"Removing trailing slash\n"); size -= 1; wfullpath[size] = L'\0'; } // wprintf(L"wfullpath: '%s'\n", wfullpath); /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { // wprintf(L"Removing trailing dot\n"); size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = fenix_remove_invalid_alternative_data(wfullpath, size); // sanitize backslashes with forwardslashes fenix_replace_wchar(wfullpath, L'\\', L'/'); // wprintf(L"wfullpath: '%s'\n", wfullpath); // What CodePage should we use? // cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP; // convert to char * size = WideCharToMultiByte(cp, 0, wfullpath, -1, NULL, 0, NULL, NULL); fullpath = (char *)malloc(size * sizeof(char)); WideCharToMultiByte(cp, 0, wfullpath, -1, fullpath, size, NULL, NULL); /* convert to VALUE and set the path encoding */ result = rb_enc_str_new(fullpath, size - 1, path_encoding); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); } // TODO: better cleanup if (buffer) free(buffer); if (wpath) free(wpath); if (wdir) free(wdir); if (whome) free(whome); if (wfullpath && wfullpath != wfullpath_buffer) free(wfullpath); if (fullpath) free(fullpath); return result; }
VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { size_t size = 0, whome_len = 0; size_t buffer_len = 0; long wpath_len = 0, wdir_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL; wchar_t *wdir = NULL, *wdir_pos = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT path_cp, cp; VALUE path = fname, dir = dname; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); /* get path encoding */ if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = path_cp = code_page(path_encoding); /* workaround invalid codepage */ if (path_cp == INVALID_CODE_PAGE) { cp = CP_UTF8; if (!NIL_P(path)) { path = fix_string_encoding(path, path_encoding); } } /* convert char * to wchar_t */ if (!NIL_P(path)) { wpath = mbstr_to_wstr(cp, RSTRING_PTR(path), (int)RSTRING_LEN(path), &wpath_len); wpath_pos = wpath; } /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' && (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { xfree(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { xfree(wpath); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) { /* use filesystem encoding if expanding home dir */ path_encoding = rb_filesystem_encoding(); cp = path_cp = system_code_page(); } /* ignores dir since we are expanding home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; wpath_pos += 2; wpath_len -= 2; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wpath_pos + 1, user_length_in_path(wpath_pos + 1, wpath_len - 1), cp, path_cp, path_encoding); if (wpath) xfree(wpath); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { /* fix string encoding */ if (path_cp == INVALID_CODE_PAGE) { dir = fix_string_encoding(dir, path_encoding); } /* convert char * to wchar_t */ if (!NIL_P(dir)) { wdir = mbstr_to_wstr(cp, RSTRING_PTR(dir), (int)RSTRING_LEN(dir), &wdir_len); wdir_pos = wdir; } if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' && (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { free(wpath); free(wdir); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { free(wpath); free(wdir); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } /* exclude ~ from the result */ wdir_pos++; wdir_len--; /* exclude separator if present */ if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) { wdir_pos++; wdir_len--; } } else if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ long separators = 0; long pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; } } else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wdir_pos + 1, user_length_in_path(wdir_pos + 1, wdir_len - 1), cp, path_cp, path_encoding); if (wpath) free(wpath); if (wdir) free(wdir); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) != towupper(dir_drive)) { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; dir_drive = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } else if (!dir_drive && path_drive) { *buffer_pos++ = path_drive; *buffer_pos++ = L':'; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; wcsncpy(buffer_pos, wdir_pos, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) tainted = 1; /* FIXME: Make this more robust */ /* Determine require buffer size */ size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size > PATH_BUFFER_SIZE) { /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */ wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { size -= 1; wfullpath[size] = L'\0'; } /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = remove_invalid_alternative_data(wfullpath, size); /* Replace the trailing path to long name */ if (long_name) size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer)); /* sanitize backslashes with forwardslashes */ replace_wchar(wfullpath, L'\\', L'/'); /* convert to VALUE and set the path encoding */ rb_str_set_len(result, 0); result = append_wstr(result, wfullpath, size, cp, path_cp, path_encoding); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); /* TODO: better cleanup */ if (buffer) xfree(buffer); if (wpath) free(wpath); if (wdir) free(wdir); if (whome) xfree(whome); if (wfullpath != wfullpath_buffer) xfree(wfullpath); if (fullpath) xfree(fullpath); rb_enc_associate(result, path_encoding); return result; }