예제 #1
0
bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
{
	if (utf8.empty())
	{
		outUtf16.clear();
		return true;
	}

	bool ret = false;

	const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
	char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
	memset(utf16, 0, utf16Bytes);

	char* utf16ptr = reinterpret_cast<char*>(utf16);
	const UTF8* error = nullptr;

	if (llvm::ConvertUTF8toWide(sizeof(char16_t), utf8, utf16ptr, error))
	{
		outUtf16 = utf16;
		ret = true;
	}

	free(utf16);

	return ret;
}
예제 #2
0
void UserDictionaryDecoder::DecodeHeading(IBitStream *bstr, unsigned len, std::u16string &res) {
    res.clear();
    unsigned symIdx;
    for (size_t i = 0; i < len; i++) {
        _ltHeadings.Decode(*bstr, symIdx);
        unsigned sym = _headingSymbols.at(symIdx);
        assert(sym <= 0xffff);
        res += (char16_t)sym;
    }
}
예제 #3
0
void LineEdit::IntValidator::insert(std::u16string &string,
                                    size_t &cursor, size_t &ecursor,
                                    char16_t data) const {
  if( cursor==0 ){
    if(!string.empty() && string[0]=='-')
      return;

    if(data=='-' && !(string.size()>=1 && string[0]=='0')){
      Validator::insert(string,cursor,ecursor,data);
      return;
      }

    if(data=='0' && ((string.size()==1 && string[0]=='-') || string.size()==0)){
      Validator::insert(string,cursor,ecursor,data);
      return;
      }

    if(('1'<=data && data<='9') || data=='-'){
      Validator::insert(string,cursor,ecursor,data);
      return;
      }
    return;
    }

  const size_t pos = cursor-1;
  if( data=='0'
      && !(pos<string.size() && string[pos]=='-')
      && !(string.size()==1 && string[0]=='0') ){
    Validator::insert(string,cursor,ecursor,data);
    return;
    }

  if('1'<=data && data<='9'){
    if(string.size()==1 && string[0]=='0'){
      string.clear();
      cursor  = 0;
      ecursor = cursor;
      }
    Validator::insert(string,cursor,ecursor,data);
    return;
    }
  }
예제 #4
0
bool UserDictionaryDecoder::DecodeArticle(
        IBitStream *bstr,
        std::u16string &res,
        std::u16string const& prefix,
        LenTable& ltArticles,
        std::vector<char32_t>& articleSymbols)
{
    unsigned len = bstr->read(16);
    if (len == 0xFFFF) {
        len = bstr->read(32);
    }
    res.clear();
    unsigned symIdx;
    std::vector<uint32_t> vec;
    while ((unsigned)res.length() < len) {
        ltArticles.Decode(*bstr, symIdx);
        unsigned sym = articleSymbols.at(symIdx);
        vec.push_back(sym);
        if (sym >= 0x10000) {
            if (sym >= 0x10040) {
                unsigned startIdx = bstr->read(BitLength(len));
                unsigned len = sym - 0x1003d;
                res += res.substr(startIdx, len);
                vec.push_back(startIdx);
            } else {
                unsigned startIdx = bstr->read(BitLength(prefix.length()));
                unsigned len = sym - 0xfffd;
                res += prefix.substr(startIdx, len);
                vec.push_back(startIdx);
            }
        } else {
            res += (char16_t)sym;
        }
    }
    return true;
}
bool emit(const Token& token, std::ostream& output)
{
    static bool characterMode = false;
    static std::u16string characters;
    bool eof = false;

    if (characterMode && token.getType() != Token::Type::Character) {
        output << separator << "[\"Character\",\"" << espaceString(characters) << "\"]";
        separator = ",";
        characterMode = false;
        characters.clear();
    }

    switch (token.getType()) {
    case Token::Type::StartTag:
        output << separator << "[\"StartTag\",\"" << espaceString(token.getName()) << "\",{";
        separator = ",";
        for (auto i = token.getAttributes().begin(); i != token.getAttributes().end(); ++i) {
            if (i != token.getAttributes().begin())
                output << ',';
            auto attr = *i;
            output << '"' << espaceString(attr.getName()) << "\":\"" << attr.getValue() << '"';
        }
        output << '}';
        if (token.getFlags() & Token::SelfClosing)
            output << ",true";
        output << ']';
        break;
    case Token::Type::EndTag:
        output << separator << "[\"EndTag\",\"" << espaceString(token.getName()) << "\"]";
        separator = ",";
        break;
    case Token::Type::Comment:
        output << separator << "[\"Comment\",\"" << espaceString(token.getName()) << "\"]";
        separator = ",";
        break;
    case Token::Type::Doctype:
        output << separator << "[\"DOCTYPE\",\"" << espaceString(token.getName()) << "\",";
        separator = ",";
        if (!token.hasPublicId())
            output << "null,";
        else
            output << '"' << espaceString(token.getPublicId()) << "\",";
        if (!token.hasSystemId())
            output << "null,";
        else
            output << '"' << espaceString(token.getSystemId()) << "\",";
        output << ((token.getFlags() & Token::ForceQuirks) ? "false" : "true");
        output << ']';
        break;
    case Token::Type::Character:
        characterMode = true;
        characters += token.getChar();
        break;
    case Token::Type::EndOfFile:
        eof = true;
        break;
    case Token::Type::ParseError:
    default:
        output << separator << "\"ParseError\"";
        separator = ",";
        break;
    }
    return !eof;
}
예제 #6
0
파일: utf16.cpp 프로젝트: ufal/korektor
void utf16::encode(const std::u32string& str, std::u16string& encoded) {
  encoded.clear();

  for (auto&& chr : str)
    append(encoded, chr);
}