char *String2Utf8(Object *string) { int len = getStringLen(string); unsigned short *unicode = getStringChars(string); char *utf8 = (char*)sysMalloc(utf8CharLen(unicode, len) + 1); return unicode2Utf8(unicode, len, utf8); }
/* * call-seq: getch * * Works like StringScanner#getch but is UTF8-aware */ static VALUE rb_cStringScanner_UTF8_getch(VALUE self) { unsigned char *str; long len = 0, pos = 0; VALUE utf8Str, curStr; int8_t lastCharLen=0; #ifndef RUBINIUS struct strscanner *scanner; GET_SCANNER(self, scanner); curStr = scanner->str; pos = scanner->curr; #else curStr = rb_iv_get(self, "@string"); pos = FIX2LONG(rb_iv_get(self, "@pos")); #endif str = (unsigned char *)RSTRING_PTR(curStr); len = RSTRING_LEN(curStr); if (len > 0 && len > pos) { lastCharLen = utf8CharLen(str, len); if (lastCharLen < 0) { rb_raise(rb_eArgError, "invalid utf-8 byte sequence"); } utf8Str = rb_str_new((char *)str+pos, lastCharLen); pos += lastCharLen; #ifndef RUBINIUS scanner->curr = pos; #else rb_iv_set(self, "@pos", LONG2FIX(pos)); #endif AS_UTF8(utf8Str); return utf8Str; } else { return Qnil; } }
int getStringUtf8Len(Object *string) { return utf8CharLen(getStringChars(string), getStringLen(string)); }