Exemple #1
0
// TODO: can we fail allocating memory?
static VALUE
fenix_file_expand_path(int argc, VALUE *argv)
{
	size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
	size_t buffer_len = 0;
	char *fullpath = NULL;
	wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL;
	wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
	UINT cp;
	VALUE result = Qnil, path = Qnil, dir = Qnil;
	wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
	wchar_t path_drive = L'\0', dir_drive = L'\0';
	int ignore_dir = 0;
	rb_encoding *path_encoding;
	int tainted = 0;
	// prepare for rb_file_absolute_path()
	int abs_mode = 0;

	// retrieve path and dir from argv
	rb_scan_args(argc, argv, "11", &path, &dir);

	/* tainted if path is tainted */
	tainted = OBJ_TAINTED(path);

	// get path encoding
	if (NIL_P(dir)) {
		path_encoding = rb_enc_get(path);
	} else {
		path_encoding = rb_enc_check(path, dir);
	}
	cp = fenix_code_page(path_encoding);
	// printf("code page: %i\n", cp);

	// coerce them to string
	path = fenix_coerce_to_path(path);

	// convert char * to wchar_t
	// path
	fenix_path_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
	// wprintf(L"wpath: '%s' with (%i) characters long.\n", wpath, wpath_len);

	/* determine if we need the user's home directory */
	/* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
	if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') ||
		(wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) {
		/* tainted if expanding '~' */
		tainted = 1;

		// wprintf(L"wpath requires expansion.\n");
		whome = fenix_home_dir();
		if (whome == NULL) {
			free(wpath);
			rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
		}
		whome_len = wcslen(whome);

		if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
			free(wpath);
			rb_raise(rb_eArgError, "non-absolute home");
		}

		// wprintf(L"whome: '%s' with (%i) characters long.\n", whome, whome_len);

		/* ignores dir since we are expading home */
		ignore_dir = 1;

		/* exclude ~ from the result */
		wpath_pos++;
		wpath_len--;

		/* exclude separator if present */
		if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
			// wprintf(L"excluding expansion character and separator\n");
			wpath_pos++;
			wpath_len--;
		}
	} else if (wpath_len >= 2 && wpath_pos[1] == L':') {
		if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
			/* ignore dir since path contains a drive letter and a root slash */
			// wprintf(L"Ignore dir since we have drive letter and root slash\n");
			ignore_dir = 1;
		} else {
			/* determine if we ignore dir or not later */
			path_drive = wpath_pos[0];
		}
	} else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
		wchar_t *wuser = wpath_pos + 1;
		wchar_t *pos = wuser;
		char *user;

		/* tainted if expanding '~' */
		tainted = 1;

		while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
			pos++;

		*pos = '\0';
		size = WideCharToMultiByte(cp, 0, wuser, -1, NULL, 0, NULL, NULL);
		user = (char *)malloc(size * sizeof(char));
		WideCharToMultiByte(cp, 0, wuser, -1, user, size, NULL, NULL);

		/* convert to VALUE and set the path encoding */
		result = rb_enc_str_new(user, size - 1, path_encoding);

		free(wpath);
		if (user)
			free(user);

		rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
	}

	/* convert dir */
	if (!ignore_dir && !NIL_P(dir)) {
		// coerce them to string
		dir = fenix_coerce_to_path(dir);

		// convert char * to wchar_t
		// dir
		fenix_path_to_wchar(dir, &wdir, NULL, &wdir_len, cp);
		// wprintf(L"wdir: '%s' with (%i) characters long.\n", wdir, wdir_len);

		if (wdir_len >= 2 && wdir[1] == L':') {
			dir_drive = wdir[0];
			if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
				wdir_len = 2;
			}
		} else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
			/* UNC path */
			if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
				/* cut the UNC path tail to '//host/share' */
				size_t separators = 0;
				size_t pos = 2;
				while (pos < wdir_len && separators < 2) {
					if (IS_DIR_SEPARATOR_P(wdir[pos])) {
						separators++;
					}
					pos++;
				}
				if (separators == 2)
					wdir_len = pos - 1;
				// wprintf(L"UNC wdir: '%s' with (%i) characters.\n", wdir, wdir_len);
			}
		}
	}

	/* determine if we ignore dir or not */
	if (!ignore_dir && path_drive && dir_drive) {
		if (towupper(path_drive) == towupper(dir_drive)) {
			/* exclude path drive letter to use dir */
			// wprintf(L"excluding path drive letter\n");
			wpath_pos += 2;
			wpath_len -= 2;
		} else {
			/* ignore dir since path drive is different from dir drive */
			ignore_dir = 1;
			wdir_len = 0;
		}
	}

	if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
		/* ignore dir since path has UNC root */
		ignore_dir = 1;
		wdir_len = 0;
	} else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
		!dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
		/* ignore dir since path has root slash and dir doesn't have drive or UNC root */
		ignore_dir = 1;
		wdir_len = 0;
	}

	// wprintf(L"wpath_len: %i\n", wpath_len);
	// wprintf(L"wdir_len: %i\n", wdir_len);
	// wprintf(L"whome_len: %i\n", whome_len);

	buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
	// wprintf(L"buffer_len: %i\n", buffer_len + 1);

	buffer = buffer_pos = (wchar_t *)malloc((buffer_len + 1) * sizeof(wchar_t));

	/* add home */
	if (whome_len) {
		// wprintf(L"Copying whome...\n");
		wcsncpy(buffer_pos, whome, whome_len);
		buffer_pos += whome_len;
	}

	/* Add separator if required */
	if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
		// wprintf(L"Adding separator after whome\n");
		buffer_pos[0] = L'\\';
		buffer_pos++;
	}

	if (wdir_len) {
		/* tainted if dir is used and dir is tainted */
		if (!tainted && OBJ_TAINTED(dir))
			tainted = 1;

		// wprintf(L"Copying wdir...\n");
		wcsncpy(buffer_pos, wdir, wdir_len);
		buffer_pos += wdir_len;
	}

	/* add separator if required */
	if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
		// wprintf(L"Adding separator after wdir\n");
		buffer_pos[0] = L'\\';
		buffer_pos++;
	}

	/* now deal with path */
	if (wpath_len) {
		// wprintf(L"Copying wpath...\n");
		wcsncpy(buffer_pos, wpath_pos, wpath_len);
		buffer_pos += wpath_len;
	}

	/* GetFullPathNameW requires at least "." to determine current directory */
	if (wpath_len == 0) {
		// wprintf(L"Adding '.' to buffer\n");
		buffer_pos[0] = L'.';
		buffer_pos++;
	}

	/* Ensure buffer is NULL terminated */
	buffer_pos[0] = L'\0';


	/* tainted if path is relative */
	if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) {
	    tainted = 1;
	}

	// wprintf(L"buffer: '%s'\n", buffer);

	// FIXME: Make this more robust
	// Determine require buffer size
	size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
	if (size) {
		if (size > PATH_BUFFER_SIZE) {
			// allocate enough memory to contain the response
			wfullpath = (wchar_t *)malloc(size * sizeof(wchar_t));
			size = GetFullPathNameW(buffer, size, wfullpath, NULL);
		} else {
			wfullpath = wfullpath_buffer;
		}
		// wprintf(L"wfullpath: '%s'\n", wfullpath);


		/* Calculate the new size and leave the garbage out */
		// size = wcslen(wfullpath);

		/* Remove any trailing slashes */
		if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
			wfullpath[size - 2] != L':' &&
			!(size == 2 && IS_DIR_UNC_P(wfullpath))) {
			// wprintf(L"Removing trailing slash\n");
			size -= 1;
			wfullpath[size] = L'\0';
		}
		// wprintf(L"wfullpath: '%s'\n", wfullpath);

		/* Remove any trailing dot */
		if (wfullpath[size - 1] == L'.') {
			// wprintf(L"Removing trailing dot\n");
			size -= 1;
			wfullpath[size] = L'\0';
		}

		/* removes trailing invalid ':$DATA' */
		size = fenix_remove_invalid_alternative_data(wfullpath, size);

		// sanitize backslashes with forwardslashes
		fenix_replace_wchar(wfullpath, L'\\', L'/');
		// wprintf(L"wfullpath: '%s'\n", wfullpath);

		// What CodePage should we use?
		// cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

		// convert to char *
		size = WideCharToMultiByte(cp, 0, wfullpath, -1, NULL, 0, NULL, NULL);
		fullpath = (char *)malloc(size * sizeof(char));
		WideCharToMultiByte(cp, 0, wfullpath, -1, fullpath, size, NULL, NULL);

		/* convert to VALUE and set the path encoding */
		result = rb_enc_str_new(fullpath, size - 1, path_encoding);

		/* makes the result object tainted if expanding tainted strings or returning modified path */
		if (tainted)
			OBJ_TAINT(result);
	}

	// TODO: better cleanup
	if (buffer)
		free(buffer);

	if (wpath)
		free(wpath);

	if (wdir)
		free(wdir);

	if (whome)
		free(whome);

	if (wfullpath && wfullpath != wfullpath_buffer)
		free(wfullpath);

	if (fullpath)
		free(fullpath);

	return result;
}
Exemple #2
0
VALUE
rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
{
    size_t size = 0, whome_len = 0;
    size_t buffer_len = 0;
    long wpath_len = 0, wdir_len = 0;
    char *fullpath = NULL;
    wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL;
    wchar_t *wdir = NULL, *wdir_pos = NULL;
    wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
    UINT path_cp, cp;
    VALUE path = fname, dir = dname;
    wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
    wchar_t path_drive = L'\0', dir_drive = L'\0';
    int ignore_dir = 0;
    rb_encoding *path_encoding;
    int tainted = 0;

    /* tainted if path is tainted */
    tainted = OBJ_TAINTED(path);

    /* get path encoding */
    if (NIL_P(dir)) {
	path_encoding = rb_enc_get(path);
    }
    else {
	path_encoding = rb_enc_check(path, dir);
    }

    cp = path_cp = code_page(path_encoding);

    /* workaround invalid codepage */
    if (path_cp == INVALID_CODE_PAGE) {
	cp = CP_UTF8;
	if (!NIL_P(path)) {
	    path = fix_string_encoding(path, path_encoding);
	}
    }

    /* convert char * to wchar_t */
    if (!NIL_P(path)) {
	wpath = mbstr_to_wstr(cp, RSTRING_PTR(path), (int)RSTRING_LEN(path), &wpath_len);
	wpath_pos = wpath;
    }

    /* determine if we need the user's home directory */
    /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
    if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
	(wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
	/* tainted if expanding '~' */
	tainted = 1;

	whome = home_dir();
	if (whome == NULL) {
	    xfree(wpath);
	    rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
	}
	whome_len = wcslen(whome);

	if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
	    xfree(wpath);
	    xfree(whome);
	    rb_raise(rb_eArgError, "non-absolute home");
	}

	if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) {
	    /* use filesystem encoding if expanding home dir */
	    path_encoding = rb_filesystem_encoding();
	    cp = path_cp = system_code_page();
	}

	/* ignores dir since we are expanding home */
	ignore_dir = 1;

	/* exclude ~ from the result */
	wpath_pos++;
	wpath_len--;

	/* exclude separator if present */
	if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
	    wpath_pos++;
	    wpath_len--;
	}
    }
    else if (wpath_len >= 2 && wpath_pos[1] == L':') {
	if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
	    /* ignore dir since path contains a drive letter and a root slash */
	    ignore_dir = 1;
	}
	else {
	    /* determine if we ignore dir or not later */
	    path_drive = wpath_pos[0];
	    wpath_pos += 2;
	    wpath_len -= 2;
	}
    }
    else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
	result = rb_str_new_cstr("can't find user ");
	result = append_wstr(result, wpath_pos + 1, user_length_in_path(wpath_pos + 1, wpath_len - 1),
			     cp, path_cp, path_encoding);

	if (wpath)
	    xfree(wpath);

	rb_exc_raise(rb_exc_new_str(rb_eArgError, result));
    }

    /* convert dir */
    if (!ignore_dir && !NIL_P(dir)) {
	/* fix string encoding */
	if (path_cp == INVALID_CODE_PAGE) {
	    dir = fix_string_encoding(dir, path_encoding);
	}

	/* convert char * to wchar_t */
	if (!NIL_P(dir)) {
	    wdir = mbstr_to_wstr(cp, RSTRING_PTR(dir), (int)RSTRING_LEN(dir), &wdir_len);
	    wdir_pos = wdir;
	}

	if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' &&
	    (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) {
	    /* tainted if expanding '~' */
	    tainted = 1;

	    whome = home_dir();
	    if (whome == NULL) {
		free(wpath);
		free(wdir);
		rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
	    }
	    whome_len = wcslen(whome);

	    if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
		free(wpath);
		free(wdir);
		xfree(whome);
		rb_raise(rb_eArgError, "non-absolute home");
	    }

	    /* exclude ~ from the result */
	    wdir_pos++;
	    wdir_len--;

	    /* exclude separator if present */
	    if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) {
		wdir_pos++;
		wdir_len--;
	    }
	}
	else if (wdir_len >= 2 && wdir[1] == L':') {
	    dir_drive = wdir[0];
	    if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
		wdir_len = 2;
	    }
	}
	else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
	    /* UNC path */
	    if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
		/* cut the UNC path tail to '//host/share' */
		long separators = 0;
		long pos = 2;
		while (pos < wdir_len && separators < 2) {
		    if (IS_DIR_SEPARATOR_P(wdir[pos])) {
			separators++;
		    }
		    pos++;
		}
		if (separators == 2)
		    wdir_len = pos - 1;
	    }
	}
	else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') {
	    result = rb_str_new_cstr("can't find user ");
	    result = append_wstr(result, wdir_pos + 1, user_length_in_path(wdir_pos + 1, wdir_len - 1),
				 cp, path_cp, path_encoding);
	    if (wpath)
		free(wpath);

	    if (wdir)
		free(wdir);

	    rb_exc_raise(rb_exc_new_str(rb_eArgError, result));
	}
    }

    /* determine if we ignore dir or not */
    if (!ignore_dir && path_drive && dir_drive) {
	if (towupper(path_drive) != towupper(dir_drive)) {
	    /* ignore dir since path drive is different from dir drive */
	    ignore_dir = 1;
	    wdir_len = 0;
	    dir_drive = 0;
	}
    }

    if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
	/* ignore dir since path has UNC root */
	ignore_dir = 1;
	wdir_len = 0;
    }
    else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
	     !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
	/* ignore dir since path has root slash and dir doesn't have drive or UNC root */
	ignore_dir = 1;
	wdir_len = 0;
    }

    buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;

    buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));

    /* add home */
    if (whome_len) {
	wcsncpy(buffer_pos, whome, whome_len);
	buffer_pos += whome_len;
    }

    /* Add separator if required */
    if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
	buffer_pos[0] = L'\\';
	buffer_pos++;
    }
    else if (!dir_drive && path_drive) {
	*buffer_pos++ = path_drive;
	*buffer_pos++ = L':';
    }

    if (wdir_len) {
	/* tainted if dir is used and dir is tainted */
	if (!tainted && OBJ_TAINTED(dir))
	    tainted = 1;

	wcsncpy(buffer_pos, wdir_pos, wdir_len);
	buffer_pos += wdir_len;
    }

    /* add separator if required */
    if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
	buffer_pos[0] = L'\\';
	buffer_pos++;
    }

    /* now deal with path */
    if (wpath_len) {
	wcsncpy(buffer_pos, wpath_pos, wpath_len);
	buffer_pos += wpath_len;
    }

    /* GetFullPathNameW requires at least "." to determine current directory */
    if (wpath_len == 0) {
	buffer_pos[0] = L'.';
	buffer_pos++;
    }

    /* Ensure buffer is NULL terminated */
    buffer_pos[0] = L'\0';

    /* tainted if path is relative */
    if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
	tainted = 1;

    /* FIXME: Make this more robust */
    /* Determine require buffer size */
    size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
    if (size > PATH_BUFFER_SIZE) {
	/* allocate more memory than alloted originally by PATH_BUFFER_SIZE */
	wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
	size = GetFullPathNameW(buffer, size, wfullpath, NULL);
    }
    else {
	wfullpath = wfullpath_buffer;
    }

    /* Remove any trailing slashes */
    if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
	wfullpath[size - 2] != L':' &&
	!(size == 2 && IS_DIR_UNC_P(wfullpath))) {
	size -= 1;
	wfullpath[size] = L'\0';
    }

    /* Remove any trailing dot */
    if (wfullpath[size - 1] == L'.') {
	size -= 1;
	wfullpath[size] = L'\0';
    }

    /* removes trailing invalid ':$DATA' */
    size = remove_invalid_alternative_data(wfullpath, size);

    /* Replace the trailing path to long name */
    if (long_name)
	size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));

    /* sanitize backslashes with forwardslashes */
    replace_wchar(wfullpath, L'\\', L'/');

    /* convert to VALUE and set the path encoding */
    rb_str_set_len(result, 0);
    result = append_wstr(result, wfullpath, size, cp, path_cp, path_encoding);

    /* makes the result object tainted if expanding tainted strings or returning modified path */
    if (tainted)
	OBJ_TAINT(result);

    /* TODO: better cleanup */
    if (buffer)
	xfree(buffer);

    if (wpath)
	free(wpath);

    if (wdir)
	free(wdir);

    if (whome)
	xfree(whome);

    if (wfullpath != wfullpath_buffer)
	xfree(wfullpath);

    if (fullpath)
	xfree(fullpath);

    rb_enc_associate(result, path_encoding);
    return result;
}