/*static*/ void WStringCodec::decode(DBusMessageIter& iter, std::wstring& str) { str.clear(); DBusMessageIter _iter; simppl_dbus_message_iter_recurse(&iter, &_iter, DBUS_TYPE_ARRAY); int count = #if DBUS_MAJOR_VERSION == 1 && DBUS_MINOR_VERSION < 9 dbus_message_iter_get_array_len(&_iter) / sizeof(uint32_t); #else dbus_message_iter_get_element_count(&iter); #endif if (count > 0) str.reserve(count); while(dbus_message_iter_get_arg_type(&_iter) != 0) { uint32_t t; Codec<uint32_t>::decode(_iter, t); str.push_back((wchar_t)t); } // advance to next element dbus_message_iter_next(&iter); }
void FileReader::ReadRawWString(std::wstring &theWStr, int theNumChars) { theWStr.erase(); theWStr.reserve(theNumChars); for(int i=0; i<theNumChars; i++) theWStr += (wchar_t)ReadShort(); }
void load_file2(std::wstring& wstrOut, std::wistream& wistreamIn) { wstrOut.erase(); //bad() 如果出现错误则返回true if(wistreamIn.bad()) return; wstrOut.reserve(wistreamIn.rdbuf()->in_avail()); wchar_t c; //get() 读取字符 while(wistreamIn.get(c)) { if(wstrOut.capacity() == wstrOut.size()) wstrOut.reserve(wstrOut.capacity() * 3); wstrOut.append(1, c); } }
explicit node(nodetype type, wchar_t op = '+', int num = 0) { t = type; l_child = r_child = 0; this->op = op; this->num = num; translist.reserve(10); }
const std::wstring& getAlphabet() { const wchar_t kMinCharCode = 32, kMaxCharCode = 126; static std::wstring alphabet; if (alphabet.empty()) { alphabet.reserve(kMaxCharCode - kMinCharCode); for (wchar_t c = kMinCharCode; c <= kMaxCharCode; ++c) { alphabet.push_back(c); } } return alphabet; }
///----------------------------------------------------------------------------- ///! @brief TODO enter a description ///! @remark ///----------------------------------------------------------------------------- void convertToUTF16String(const std::string& str, std::wstring& out) { wchar_t buffer[4096]; int numberOfWideChars = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str.c_str(), (int)str.size(), buffer, 0); if (numberOfWideChars > 0) { out.reserve(numberOfWideChars); MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str.c_str(), (int)str.size(), buffer, numberOfWideChars); buffer[numberOfWideChars] = 0; out = buffer; } else { DWORD error = GetLastError(); HRESULT hr = HRESULT_FROM_WIN32(error); MSG_TRACE_CHANNEL("String Conversion Error", "Failed to convert from UTF8 to MB with Hresult: 0x%08x, %s", hr, getLastErrorMessage(error)); } }
bool extractWstring(std::istream & sourceFile, std::wstring & result) { int stringLengthPrefix = extractInt(sourceFile); unsigned char * buff = new unsigned char[2*stringLengthPrefix]; char16_t oneWchar; sourceFile.read((char*)buff, 2*stringLengthPrefix); result.clear(); result.reserve(stringLengthPrefix+1); // .CEL files' "Unicode" wide-strings do not actually conform to // the Unicode standard. Each wide-byte is big-endian instead // of the required little-endian. for (int i = 0; i < stringLengthPrefix; i++) { oneWchar = buff[i*2] << 8; oneWchar += buff[(i+1)*2-1]; result.append(1, oneWchar); } delete [] buff; return true; }
/** * Extracts a JSON String as defined by the spec - "<some chars>" * Any escaped characters are swapped out for their unescaped values * * @access protected * * @param wchar_t** data Pointer to a wchar_t* that contains the JSON text * @param std::wstring& str Reference to a std::wstring to receive the extracted string * * @return bool Returns true on success, false on failure */ bool JSON::ExtractString(const wchar_t **data, std::wstring &str) { str = L""; while (**data != 0) { // Save the char so we can change it if need be wchar_t next_char = **data; // Escaping something? if (next_char == L'\\') { // Move over the escape char (*data)++; // Deal with the escaped char switch (**data) { case L'"': next_char = L'"'; break; case L'\\': next_char = L'\\'; break; case L'/': next_char = L'/'; break; case L'b': next_char = L'\b'; break; case L'f': next_char = L'\f'; break; case L'n': next_char = L'\n'; break; case L'r': next_char = L'\r'; break; case L't': next_char = L'\t'; break; case L'u': { // We need 5 chars (4 hex + the 'u') or its not valid if (!simplejson_wcsnlen(*data, 5)) return false; // Deal with the chars next_char = 0; for (int i = 0; i < 4; i++) { // Do it first to move off the 'u' and leave us on the // final hex digit as we move on by one later on (*data)++; next_char <<= 4; // Parse the hex digit if (**data >= '0' && **data <= '9') next_char |= (**data - '0'); else if (**data >= 'A' && **data <= 'F') next_char |= (10 + (**data - 'A')); else if (**data >= 'a' && **data <= 'f') next_char |= (10 + (**data - 'a')); else { // Invalid hex digit = invalid JSON return false; } } break; } // By the spec, only the above cases are allowed default: return false; } } // End of the string? else if (next_char == L'"') { (*data)++; str.reserve(); // Remove unused capacity return true; } // Disallowed char? else if (next_char < L' ' && next_char != L'\t') { // SPEC Violation: Allow tabs due to real world cases return false; } // Add the next char str += next_char; // Move on (*data)++; } // If we're here, the string ended incorrectly return false; }
BOOL DownloadWebPageUnicode(std::wstring& page, HINTERNET hInternet, LPCWSTR url) { TRACEST(_T("DownloadWebPageUnicode"), CW2CT(url)); BOOL bDownloadSuccess = FALSE; if (hInternet == NULL) return FALSE; HINTERNET hFile = InternetOpenUrl(hInternet, url, NULL, 0, INTERNET_FLAG_NO_CACHE_WRITE, 0); if (hFile) { page.clear(); DWORD dwSize = 0; INT headerCP = 0;//Latin1 INT pageCP = 0;//Latin1 INT workingCP = 1252;//Latin1 if(!HttpQueryInfo(hFile, HTTP_QUERY_RAW_HEADERS_CRLF, 0, &dwSize, 0)) { if (GetLastError()==ERROR_INSUFFICIENT_BUFFER) { SetLastError(0); LPVOID lpOutBuffer = new CHAR[dwSize]; if (HttpQueryInfo(hFile, HTTP_QUERY_RAW_HEADERS_CRLF, lpOutBuffer, &dwSize, 0)) { //TRACE((LPCTSTR)lpOutBuffer); headerCP = GetWindowsCodePageW((LPCTSTR)lpOutBuffer); } delete[] lpOutBuffer; } } DWORD pageSize = 0; DWORD pageSizeLen = sizeof(pageSize); if (::HttpQueryInfo(hFile, HTTP_QUERY_CONTENT_LENGTH, &pageSize, &pageSizeLen, NULL)) { if (pageSize > 0) page.reserve(pageSize + 10); } if (page.capacity() < 1000) page.reserve(50000); const int bufferSize = 8192; const int bufferSizeU = 2 * bufferSize; CHAR bf[bufferSize + 1]; TCHAR bfU[bufferSizeU]; unsigned long nSize = 0; BOOL bReadSuccess = TRUE; BOOL bFirstTime = TRUE; while(bReadSuccess) { bReadSuccess = InternetReadFile(hFile, bf, bufferSize, &nSize); if (bReadSuccess) { if (nSize == 0) { //TRACE(_T("@3 DownloadWebPage. InternetReadFile Finished\r\n")); bDownloadSuccess = TRUE; break; } if (bFirstTime) { bFirstTime = FALSE; bf[nSize] = 0; pageCP = GetWindowsCodePageA(bf); if (headerCP != pageCP) { TRACE(_T("@3 DownloadWebPage. CodePage Differs (header:%d - page:%d)\r\n"), headerCP, pageCP); } if (pageCP > 0) //===Choose the Codepage detected from page if available workingCP = pageCP; else if (headerCP > 0) //===Else select the Codepage detected from headers if available workingCP = headerCP; //===Otherwise keep the original 1252 (latin 1) } if (nSize > 0) { INT bfLen = MultiByteToWideChar(workingCP, 0, bf, nSize, 0, 0); if (bfLen < bufferSizeU) { MultiByteToWideChar(workingCP, 0, bf, nSize, bfU, bfLen); page.append(bfU, bfLen); } else { TRACE(_T("@1 DownloadWebPageUnicode. Unicode buffer not enough\r\n")); bReadSuccess = FALSE; } } } else HandleInternetError(_T("DownloadWebPageUnicode. InternetReadFile")); } InternetCloseHandle(hFile); } else HandleInternetError(_T("DownloadWebPageUnicode. InternetOpenUrl")); return bDownloadSuccess; }
void TextDecoder::Append(std::wstring& str, const uint8_t* bytes, size_t length, CharacterSet charset) { switch (charset) { case CharacterSet::Unknown: case CharacterSet::ISO8859_1: case CharacterSet::ASCII: { str.append(bytes, bytes + length); break; } case CharacterSet::ISO8859_2: case CharacterSet::ISO8859_3: case CharacterSet::ISO8859_4: case CharacterSet::ISO8859_5: case CharacterSet::ISO8859_6: case CharacterSet::ISO8859_7: case CharacterSet::ISO8859_8: case CharacterSet::ISO8859_9: case CharacterSet::ISO8859_10: case CharacterSet::ISO8859_11: case CharacterSet::ISO8859_13: case CharacterSet::ISO8859_14: case CharacterSet::ISO8859_15: case CharacterSet::ISO8859_16: case CharacterSet::Cp437: case CharacterSet::Cp1250: case CharacterSet::Cp1251: case CharacterSet::Cp1252: case CharacterSet::Cp1256: { str.reserve(str.length() + length); for (size_t i = 0; i < length; ++i) { uint8_t c = bytes[i]; if (c < 128) str.push_back(c); else str.push_back(Codecs::SINGLE_BYTE_CODEPAGES[((int)charset - (int)CharacterSet::ISO8859_2) * 128 + c - 128]); } break; } case CharacterSet::Shift_JIS: { std::vector<uint16_t> buf; JPTextDecoder::AppendShiftJIS(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; } case CharacterSet::Big5: { std::vector<uint16_t> buf; Big5TextDecoder::AppendBig5(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; } case CharacterSet::GB2312: { std::vector<uint16_t> buf; GBTextDecoder::AppendGB2312(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; } case CharacterSet::GB18030: { std::vector<uint16_t> buf; GBTextDecoder::AppendGB18030(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; } case CharacterSet::EUC_JP: { std::vector<uint16_t> buf; JPTextDecoder::AppendEUCJP(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; } case CharacterSet::EUC_KR: { std::vector<uint16_t> buf; KRTextDecoder::AppendEucKr(buf, bytes, length); TextUtfEncoding::AppendUtf16(str, buf.data(), buf.size()); break; break; } case CharacterSet::UnicodeBig: { str.reserve(str.length() + length / 2); for (size_t i = 0; i + 1 < length; i += 2) { str.push_back((static_cast<wchar_t>(bytes[i]) << 8) + bytes[i + 1]); } break; } case CharacterSet::UTF8: { TextUtfEncoding::AppendUtf8(str, bytes, length); break; } default: break; } }