/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT fenix_code_page(rb_encoding *enc) { VALUE code_page_value, name_key; VALUE encoding, names_ary = Qundef, name; char *enc_name; struct RString fake_str; ID names; long i; if (!enc) return system_code_page(); enc_name = (char *)rb_enc_name(enc); fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; fake_str.basic.klass = rb_cString; fake_str.as.heap.len = strlen(enc_name); fake_str.as.heap.ptr = enc_name; fake_str.as.heap.aux.capa = fake_str.as.heap.len; name_key = (VALUE)&fake_str; ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); OBJ_FREEZE(name_key); code_page_value = rb_hash_lookup(rb_code_page, name_key); if (code_page_value != Qnil) { // printf("cached code page: %i\n", FIX2INT(code_page_value)); if (FIX2INT(code_page_value) == -1) { return system_code_page(); } else { return (UINT)FIX2INT(code_page_value); } } name_key = rb_usascii_str_new2(enc_name); encoding = rb_enc_from_encoding(enc); if (!NIL_P(encoding)) { CONST_ID(names, "names"); names_ary = rb_funcall(encoding, names, 0); } if (names_ary != Qundef) { for (i = 0; i < RARRAY_LEN(names_ary); i++) { name = RARRAY_PTR(names_ary)[i]; if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { int code_page = atoi(RSTRING_PTR(name) + 2); rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return (UINT)code_page; } } } rb_hash_aset(rb_code_page, name_key, INT2FIX(-1)); return system_code_page(); }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { VALUE code_page_value, name_key; VALUE encoding, names_ary = Qundef, name; char *enc_name; struct RString fake_str; ID names; long i; if (!enc) return system_code_page(); enc_name = (char *)rb_enc_name(enc); fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; fake_str.basic.klass = rb_cString; fake_str.as.heap.len = strlen(enc_name); fake_str.as.heap.ptr = enc_name; fake_str.as.heap.aux.capa = fake_str.as.heap.len; name_key = (VALUE)&fake_str; ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); code_page_value = rb_hash_lookup(rb_code_page, name_key); if (code_page_value != Qnil) return (UINT)FIX2INT(code_page_value); name_key = rb_usascii_str_new2(enc_name); encoding = rb_enc_from_encoding(enc); if (!NIL_P(encoding)) { CONST_ID(names, "names"); names_ary = rb_funcall(encoding, names, 0); } /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) { UINT code_page = 1252; rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return code_page; } if (names_ary != Qundef) { for (i = 0; i < RARRAY_LEN(names_ary); i++) { name = RARRAY_PTR(names_ary)[i]; if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { int code_page = atoi(RSTRING_PTR(name) + 2); if (code_page != 0) { rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return (UINT)code_page; } } } } rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE)); return INVALID_CODE_PAGE; }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { int enc_idx; if (!enc) return system_code_page(); enc_idx = rb_enc_to_index(enc); /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) { return 1252; } if (enc_idx == rb_utf8_encindex()) { return CP_UTF8; } if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count) return rb_code_page.table[enc_idx]; return INVALID_CODE_PAGE; }
VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { size_t size = 0, whome_len = 0; size_t buffer_len = 0; long wpath_len = 0, wdir_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL; wchar_t *wdir = NULL, *wdir_pos = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT path_cp, cp; VALUE path = fname, dir = dname; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); /* get path encoding */ if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = path_cp = code_page(path_encoding); /* workaround invalid codepage */ if (path_cp == INVALID_CODE_PAGE) { cp = CP_UTF8; if (!NIL_P(path)) { path = fix_string_encoding(path, path_encoding); } } /* convert char * to wchar_t */ if (!NIL_P(path)) { wpath = mbstr_to_wstr(cp, RSTRING_PTR(path), (int)RSTRING_LEN(path), &wpath_len); wpath_pos = wpath; } /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' && (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { xfree(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { xfree(wpath); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) { /* use filesystem encoding if expanding home dir */ path_encoding = rb_filesystem_encoding(); cp = path_cp = system_code_page(); } /* ignores dir since we are expanding home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; wpath_pos += 2; wpath_len -= 2; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wpath_pos + 1, user_length_in_path(wpath_pos + 1, wpath_len - 1), cp, path_cp, path_encoding); if (wpath) xfree(wpath); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { /* fix string encoding */ if (path_cp == INVALID_CODE_PAGE) { dir = fix_string_encoding(dir, path_encoding); } /* convert char * to wchar_t */ if (!NIL_P(dir)) { wdir = mbstr_to_wstr(cp, RSTRING_PTR(dir), (int)RSTRING_LEN(dir), &wdir_len); wdir_pos = wdir; } if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' && (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { free(wpath); free(wdir); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { free(wpath); free(wdir); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } /* exclude ~ from the result */ wdir_pos++; wdir_len--; /* exclude separator if present */ if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) { wdir_pos++; wdir_len--; } } else if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ long separators = 0; long pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; } } else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wdir_pos + 1, user_length_in_path(wdir_pos + 1, wdir_len - 1), cp, path_cp, path_encoding); if (wpath) free(wpath); if (wdir) free(wdir); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) != towupper(dir_drive)) { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; dir_drive = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } else if (!dir_drive && path_drive) { *buffer_pos++ = path_drive; *buffer_pos++ = L':'; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; wcsncpy(buffer_pos, wdir_pos, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) tainted = 1; /* FIXME: Make this more robust */ /* Determine require buffer size */ size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size > PATH_BUFFER_SIZE) { /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */ wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { size -= 1; wfullpath[size] = L'\0'; } /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = remove_invalid_alternative_data(wfullpath, size); /* Replace the trailing path to long name */ if (long_name) size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer)); /* sanitize backslashes with forwardslashes */ replace_wchar(wfullpath, L'\\', L'/'); /* convert to VALUE and set the path encoding */ rb_str_set_len(result, 0); result = append_wstr(result, wfullpath, size, cp, path_cp, path_encoding); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); /* TODO: better cleanup */ if (buffer) xfree(buffer); if (wpath) free(wpath); if (wdir) free(wdir); if (whome) xfree(whome); if (wfullpath != wfullpath_buffer) xfree(wfullpath); if (fullpath) xfree(fullpath); rb_enc_associate(result, path_encoding); return result; }