static inline VALUE get_user_from_path(wchar_t **wpath, int offset, UINT cp, UINT path_cp, rb_encoding *path_encoding) { VALUE result, tmp; wchar_t *wuser = *wpath + offset; wchar_t *pos = wuser; char *user; size_t size; while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0') pos++; *pos = '\0'; convert_wchar_to_mb(wuser, &user, &size, cp); /* convert to VALUE and set the path encoding */ if (path_cp == INVALID_CODE_PAGE) { tmp = rb_enc_str_new(user, size, rb_utf8_encoding()); result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil); rb_str_resize(tmp, 0); } else { result = rb_enc_str_new(user, size, path_encoding); } if (user) xfree(user); return result; }
static inline VALUE fix_string_encoding(VALUE str, rb_encoding *encoding) { VALUE result, tmp; tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding); result = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil); return result; }
VALUE rb_grn_context_rb_string_encode (grn_ctx *context, VALUE rb_string) { #ifdef HAVE_RUBY_ENCODING_H rb_encoding *encoding, *to_encode; encoding = rb_enc_get(rb_string); to_encode = rb_grn_encoding_to_ruby_encoding(context->encoding); if (rb_enc_to_index(encoding) != rb_enc_to_index(to_encode)) rb_string = rb_str_encode(rb_string, rb_enc_from_encoding(to_encode), 0, Qnil); #endif return rb_string; }
/* * Document-method: String#to_msgpack * * call-seq: * string.to_msgpack(out = '') -> String * * Serializes the String into raw bytes. */ static VALUE MessagePack_String_to_msgpack(int argc, VALUE *argv, VALUE self) { ARG_BUFFER(out, argc, argv); #ifdef COMPAT_HAVE_ENCODING int enc = ENCODING_GET(self); if(enc != s_enc_utf8 && enc != s_enc_ascii8bit && enc != s_enc_usascii) { if(!ENC_CODERANGE_ASCIIONLY(self)) { self = rb_str_encode(self, s_enc_utf8_value, 0, Qnil); } } #endif msgpack_pack_raw(out, RSTRING_LEN(self)); msgpack_pack_raw_body(out, RSTRING_PTR(self), RSTRING_LEN(self)); return out; }
VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) { rb_encoding* desired_encoding = (type == UPB_TYPE_STRING) ? kRubyStringUtf8Encoding : kRubyString8bitEncoding; VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding); // Note: this will not duplicate underlying string data unless necessary. value = rb_str_encode(value, desired_encoding_value, 0, Qnil); if (type == UPB_TYPE_STRING && rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { rb_raise(rb_eEncodingError, "String is invalid UTF-8"); } // Ensure the data remains valid. Since we called #encode a moment ago, // this does not freeze the string the user assigned. rb_obj_freeze(value); return value; }
/* * TermInfo.wcswidth(str) * * TermInfo.wcswidth returns a the number of columns of str, * according to current locale. */ static VALUE rt_wcswidth(VALUE self, VALUE str) { char *s; size_t l, r; mbstate_t mbs; wchar_t wc; long cols; int width; #ifdef HAVE_RUBY_ENCODING_H /* The encoding of str is assumed to be the locale encoding on Ruby 1.8. */ str = rb_str_encode(str, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil); #endif memset(&mbs,0,sizeof(mbstate_t)); s = StringValueCStr(str); l = RSTRING_LEN(str); cols = 0; while (0 < l) { r = mbrtowc(&wc, s, l, &mbs); if (r == 0) rb_raise(rb_eArgError, "NUL found"); width = wcwidth(wc); if (width == -1) rb_raise(rb_eArgError, "non-printable charactor found"); cols += width; l -= r; s += r; } return LONG2NUM(cols); }
/** * Convert a ruby string into a utf-8 compatible binary string. * * @example Convert the string to utf-8 binary. * rb_bson_to_utf8_binary("test"); * * @param [ String ] string The ruby string. * * @return [ String ] The encoded string. * * @since 2.0.0 */ static VALUE rb_bson_to_utf8_binary(VALUE string) { VALUE utf8 = rb_str_encode(string, rb_bson_utf8_string, 0, Qnil); return rb_enc_associate(utf8, rb_ascii8bit_encoding()); }
/* call-seq: stmt.bind_param(key, value) * * Binds value to the named (or positional) placeholder. If +param+ is a * Fixnum, it is treated as an index for a positional placeholder. * Otherwise it is used as the name of the placeholder to bind to. * * See also #bind_params. */ static VALUE bind_param(VALUE self, VALUE key, VALUE value) { sqlite3StmtRubyPtr ctx; int status; int index; Data_Get_Struct(self, sqlite3StmtRuby, ctx); REQUIRE_OPEN_STMT(ctx); switch(TYPE(key)) { case T_SYMBOL: key = rb_funcall(key, rb_intern("to_s"), 0); case T_STRING: if(RSTRING_PTR(key)[0] != ':') key = rb_str_plus(rb_str_new2(":"), key); index = sqlite3_bind_parameter_index(ctx->st, StringValuePtr(key)); break; default: index = (int)NUM2INT(key); } if(index == 0) rb_raise(rb_path2class("SQLite3::Exception"), "no such bind parameter"); switch(TYPE(value)) { case T_STRING: if(CLASS_OF(value) == cSqlite3Blob || rb_enc_get_index(value) == rb_ascii8bit_encindex() ) { status = sqlite3_bind_blob( ctx->st, index, (const char *)StringValuePtr(value), (int)RSTRING_LEN(value), SQLITE_TRANSIENT ); } else { if (UTF16_LE_P(value) || UTF16_BE_P(value)) { status = sqlite3_bind_text16( ctx->st, index, (const char *)StringValuePtr(value), (int)RSTRING_LEN(value), SQLITE_TRANSIENT ); } else { if (!UTF8_P(value) || !USASCII_P(value)) { value = rb_str_encode(value, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil); } status = sqlite3_bind_text( ctx->st, index, (const char *)StringValuePtr(value), (int)RSTRING_LEN(value), SQLITE_TRANSIENT ); } } break; case T_BIGNUM: { sqlite3_int64 num64; if (bignum_to_int64(value, &num64)) { status = sqlite3_bind_int64(ctx->st, index, num64); break; } } case T_FLOAT: status = sqlite3_bind_double(ctx->st, index, NUM2DBL(value)); break; case T_FIXNUM: status = sqlite3_bind_int64(ctx->st, index, (sqlite3_int64)FIX2LONG(value)); break; case T_NIL: status = sqlite3_bind_null(ctx->st, index); break; default: rb_raise(rb_eRuntimeError, "can't prepare %s", rb_class2name(CLASS_OF(value))); break; } CHECK(sqlite3_db_handle(ctx->st), status); return self; }
VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0; size_t buffer_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL; wchar_t *wdir = NULL, *wdir_pos = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT path_cp, cp; VALUE path = fname, dir = dname; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); /* get path encoding */ if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = path_cp = code_page(path_encoding); /* workaround invalid codepage */ if (path_cp == INVALID_CODE_PAGE) { cp = CP_UTF8; if (!NIL_P(path)) { path = fix_string_encoding(path, path_encoding); } } /* convert char * to wchar_t */ convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp); /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' && (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { xfree(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { xfree(wpath); rb_raise(rb_eArgError, "non-absolute home"); } /* use filesystem encoding if expanding home dir */ path_encoding = rb_filesystem_encoding(); cp = path_cp = system_code_page(); /* ignores dir since we are expading home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { result = get_user_from_path(&wpath_pos, 1, cp, path_cp, path_encoding); if (wpath) xfree(wpath); rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { /* fix string encoding */ if (path_cp == INVALID_CODE_PAGE) { dir = fix_string_encoding(dir, path_encoding); } /* convert char * to wchar_t */ convert_mb_to_wchar(dir, &wdir, &wdir_pos, &wdir_len, cp); if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' && (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { xfree(wpath); xfree(wdir); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { xfree(wpath); xfree(wdir); rb_raise(rb_eArgError, "non-absolute home"); } /* exclude ~ from the result */ wdir_pos++; wdir_len--; /* exclude separator if present */ if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) { wdir_pos++; wdir_len--; } } else if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ size_t separators = 0; size_t pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; } } else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') { result = get_user_from_path(&wdir_pos, 1, cp, path_cp, path_encoding); if (wpath) xfree(wpath); if (wdir) xfree(wdir); rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) == towupper(dir_drive)) { /* exclude path drive letter to use dir */ wpath_pos += 2; wpath_len -= 2; } else { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; wcsncpy(buffer_pos, wdir_pos, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) tainted = 1; /* FIXME: Make this more robust */ /* Determine require buffer size */ size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size > PATH_BUFFER_SIZE) { /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */ wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { size -= 1; wfullpath[size] = L'\0'; } /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = remove_invalid_alternative_data(wfullpath, size); /* Replace the trailing path to long name */ if (long_name) size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer)); /* sanitize backslashes with forwardslashes */ replace_wchar(wfullpath, L'\\', L'/'); /* convert to char * */ size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL); if (size > (size_t)RSTRING_LEN(result)) { rb_str_modify(result); rb_str_resize(result, size); } WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL); rb_str_set_len(result, size); /* convert to VALUE and set the path encoding */ if (path_cp == INVALID_CODE_PAGE) { VALUE tmp; size_t len; rb_enc_associate(result, rb_utf8_encoding()); ENC_CODERANGE_CLEAR(result); tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil); len = RSTRING_LEN(tmp); rb_str_modify(result); rb_str_resize(result, len); memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len); rb_str_resize(tmp, 0); } rb_enc_associate(result, path_encoding); ENC_CODERANGE_CLEAR(result); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); /* TODO: better cleanup */ if (buffer) xfree(buffer); if (wpath) xfree(wpath); if (wdir) xfree(wdir); if (whome) xfree(whome); if (wfullpath && wfullpath != wfullpath_buffer) xfree(wfullpath); if (fullpath) xfree(fullpath); return result; }
VALUE string_spec_rb_str_encode(VALUE self, VALUE str, VALUE enc, VALUE flags, VALUE opts) { return rb_str_encode(str, enc, FIX2INT(flags), opts); }