void wstringEx::fromUTF8(const string &s) { size_t len = utf8Len(s); clear(); if (len == 0) return; reserve(len); for (int i = 0; s[i] != 0; ) { if ((s[i] & 0xF8) == 0xF0) { push_back(((wchar_t)(s[i] & 0x07) << 18) | ((wchar_t)(s[i + 1] & 0x3F) << 12) | ((wchar_t)(s[i + 2] & 0x3F) << 6) | (wchar_t)(s[i + 3] & 0x3F)); i += 4; } else if ((s[i] & 0xF0) == 0xE0) { push_back(((wchar_t)(s[i] & 0x0F) << 12) | ((wchar_t)(s[i + 1] & 0x3F) << 6) | (wchar_t)(s[i + 2] & 0x3F)); i += 3; } else if ((s[i] & 0xE0) == 0xC0) { push_back(((wchar_t)(s[i] & 0x1F) << 6) | (wchar_t)(s[i + 1] & 0x3F)); i += 2; } else { push_back((wchar_t)s[i]); ++i; } } }
bsString& bsString::Append(const wchar_t *ws) { if (ws && *ws) { if (Allocate(curLen+utf8Len(ws))) curLen = utf8(ws, &theStr[curLen], maxLen - curLen); } return *this; }
bsString& bsString::Assign(const wchar_t *s) { if (s && *s) { if (Allocate(utf8Len(s))) curLen = utf8(s, theStr, maxLen); } else Empty(); return *this; }
Object *createString(const char* utf8) { int len = utf8Len(utf8); unsigned short *data; Object *array; Object *ob; if((array = allocTypeArray(T_CHAR, len)) == NULL || (ob = allocObject(string_class)) == NULL) return NULL; data = (unsigned short *)ARRAY_DATA(array); convertUtf8(utf8, data); INST_DATA(ob)[count_offset] = len; INST_DATA(ob)[value_offset] = (uintptr_t)array; return ob; }
char *mangleString(char *utf8) { int len = utf8Len(utf8); unsigned short *unicode = (unsigned short*) sysMalloc(len * 2); char *mangled, *mngldPtr; int i, mangledLen = 0; convertUtf8(utf8, unicode); /* Work out the length of the mangled string */ for(i = 0; i < len; i++) { unsigned short c = unicode[i]; switch(c) { case '_': case ';': case '[': mangledLen += 2; break; default: mangledLen += isalnum(c) ? 1 : 6; break; } } mangled = mngldPtr = (char*) sysMalloc(mangledLen + 1); /* Construct the mangled string */ for(i = 0; i < len; i++) { unsigned short c = unicode[i]; switch(c) { case '_': *mngldPtr++ = '_'; *mngldPtr++ = '1'; break; case ';': *mngldPtr++ = '_'; *mngldPtr++ = '2'; break; case '[': *mngldPtr++ = '_'; *mngldPtr++ = '3'; break; case '/': *mngldPtr++ = '_'; break; default: if(isalnum(c)) *mngldPtr++ = c; else mngldPtr += sprintf(mngldPtr, "_0%04x", c); break; } } *mngldPtr = '\0'; sysFree(unicode); return mangled; }