bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16) { if (utf8.empty()) { outUtf16.clear(); return true; } bool ret = false; const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t); char16_t* utf16 = (char16_t*)malloc(utf16Bytes); memset(utf16, 0, utf16Bytes); char* utf16ptr = reinterpret_cast<char*>(utf16); const UTF8* error = nullptr; if (llvm::ConvertUTF8toWide(sizeof(char16_t), utf8, utf16ptr, error)) { outUtf16 = utf16; ret = true; } free(utf16); return ret; }
void UserDictionaryDecoder::DecodeHeading(IBitStream *bstr, unsigned len, std::u16string &res) { res.clear(); unsigned symIdx; for (size_t i = 0; i < len; i++) { _ltHeadings.Decode(*bstr, symIdx); unsigned sym = _headingSymbols.at(symIdx); assert(sym <= 0xffff); res += (char16_t)sym; } }
void LineEdit::IntValidator::insert(std::u16string &string, size_t &cursor, size_t &ecursor, char16_t data) const { if( cursor==0 ){ if(!string.empty() && string[0]=='-') return; if(data=='-' && !(string.size()>=1 && string[0]=='0')){ Validator::insert(string,cursor,ecursor,data); return; } if(data=='0' && ((string.size()==1 && string[0]=='-') || string.size()==0)){ Validator::insert(string,cursor,ecursor,data); return; } if(('1'<=data && data<='9') || data=='-'){ Validator::insert(string,cursor,ecursor,data); return; } return; } const size_t pos = cursor-1; if( data=='0' && !(pos<string.size() && string[pos]=='-') && !(string.size()==1 && string[0]=='0') ){ Validator::insert(string,cursor,ecursor,data); return; } if('1'<=data && data<='9'){ if(string.size()==1 && string[0]=='0'){ string.clear(); cursor = 0; ecursor = cursor; } Validator::insert(string,cursor,ecursor,data); return; } }
bool UserDictionaryDecoder::DecodeArticle( IBitStream *bstr, std::u16string &res, std::u16string const& prefix, LenTable& ltArticles, std::vector<char32_t>& articleSymbols) { unsigned len = bstr->read(16); if (len == 0xFFFF) { len = bstr->read(32); } res.clear(); unsigned symIdx; std::vector<uint32_t> vec; while ((unsigned)res.length() < len) { ltArticles.Decode(*bstr, symIdx); unsigned sym = articleSymbols.at(symIdx); vec.push_back(sym); if (sym >= 0x10000) { if (sym >= 0x10040) { unsigned startIdx = bstr->read(BitLength(len)); unsigned len = sym - 0x1003d; res += res.substr(startIdx, len); vec.push_back(startIdx); } else { unsigned startIdx = bstr->read(BitLength(prefix.length())); unsigned len = sym - 0xfffd; res += prefix.substr(startIdx, len); vec.push_back(startIdx); } } else { res += (char16_t)sym; } } return true; }
bool emit(const Token& token, std::ostream& output) { static bool characterMode = false; static std::u16string characters; bool eof = false; if (characterMode && token.getType() != Token::Type::Character) { output << separator << "[\"Character\",\"" << espaceString(characters) << "\"]"; separator = ","; characterMode = false; characters.clear(); } switch (token.getType()) { case Token::Type::StartTag: output << separator << "[\"StartTag\",\"" << espaceString(token.getName()) << "\",{"; separator = ","; for (auto i = token.getAttributes().begin(); i != token.getAttributes().end(); ++i) { if (i != token.getAttributes().begin()) output << ','; auto attr = *i; output << '"' << espaceString(attr.getName()) << "\":\"" << attr.getValue() << '"'; } output << '}'; if (token.getFlags() & Token::SelfClosing) output << ",true"; output << ']'; break; case Token::Type::EndTag: output << separator << "[\"EndTag\",\"" << espaceString(token.getName()) << "\"]"; separator = ","; break; case Token::Type::Comment: output << separator << "[\"Comment\",\"" << espaceString(token.getName()) << "\"]"; separator = ","; break; case Token::Type::Doctype: output << separator << "[\"DOCTYPE\",\"" << espaceString(token.getName()) << "\","; separator = ","; if (!token.hasPublicId()) output << "null,"; else output << '"' << espaceString(token.getPublicId()) << "\","; if (!token.hasSystemId()) output << "null,"; else output << '"' << espaceString(token.getSystemId()) << "\","; output << ((token.getFlags() & Token::ForceQuirks) ? "false" : "true"); output << ']'; break; case Token::Type::Character: characterMode = true; characters += token.getChar(); break; case Token::Type::EndOfFile: eof = true; break; case Token::Type::ParseError: default: output << separator << "\"ParseError\""; separator = ","; break; } return !eof; }
void utf16::encode(const std::u32string& str, std::u16string& encoded) { encoded.clear(); for (auto&& chr : str) append(encoded, chr); }