Пример #1
0
	bool CUniString::TokenizeAsciiEscapedUnicode()
	{
		CUString tmp = GetStr();
		while( true ) {
			int idx = tmp.Find( "\\u" );
			if( idx < 0 )	{
				for( int i=0; i<tmp.GetLength(); i++ ) {
					char ch = tmp.GetAt(i);
					if( (int)ch >= 0 ) {
						char buf[4]; sprintf( buf, "%c", ch );
						unichar_list.PushBack( buf );
						enc_byte += 1;
					}
					else {
						return false;
					}
				}
				break;
			}
			else {
				for( int i=0; i<idx; i++ ) {
					char ch = tmp.GetAt(i);
					if( (int)ch >= 0 ) {
						char buf[4]; sprintf( buf, "%c", ch );
						unichar_list.PushBack( buf );
						enc_byte += 1;
					}
					else {
						return false;
					}
				}
				CUString uni = tmp.SubStr( idx, idx+6 );
				uni = EncodeUTF8( uni );
				unichar_list.PushBack( uni );
				enc_byte += uni.GetLength();
				tmp = tmp.Mid( idx+6 );
			}
		}

		return true;
	}
Пример #2
0
	CUString EncodeUTF8( CUString &str )
	{
		CUString ret = "";
		if( str.GetLength() == 6 ) {
			ret = str.SubStr( 2,6 );
			unsigned long index;
			index = strtoul( ret.GetStr(), 0, 16 );
			if( 0x00 <= index && index <= 0x7F ) {
				char buf[4]; sprintf( buf, "%c", (char)index );
				ret = buf;
			}
			else if( 0x80 <= index && index <= 0x07FF )	{
				// 2byte;
				int enc_ch1 = 0xC0; // 11000000
				int enc_ch2 = 0x80; // 10000000

				char ch1 = (char)strtoul( ret.SubStr(0,2).GetStr(), 0, 16 );
				char ch2 = (char)strtoul( ret.SubStr(2,4).GetStr(), 0, 16 );

				if( ch1 & (1 << (3-1) ) ) { enc_ch1 |= (1 << (5-1) ); }
				if( ch1 & (1 << (2-1) ) ) { enc_ch1 |= (1 << (4-1) ); }
				if( ch1 & (1 << (1-1) ) ) { enc_ch1 |= (1 << (3-1) ); }

				if( ch2 & (1 << (8-1) ) ) { enc_ch1 |= (1 << (2-1) ); }
				if( ch2 & (1 << (7-1) ) ) { enc_ch1 |= (1 << (1-1) ); }
				if( ch2 & (1 << (6-1) ) ) { enc_ch2 |= (1 << (6-1) ); }
				if( ch2 & (1 << (5-1) ) ) { enc_ch2 |= (1 << (5-1) ); }
				if( ch2 & (1 << (4-1) ) ) { enc_ch2 |= (1 << (4-1) ); }
				if( ch2 & (1 << (3-1) ) ) { enc_ch2 |= (1 << (3-1) ); }
				if( ch2 & (1 << (2-1) ) ) { enc_ch2 |= (1 << (2-1) ); }
				if( ch2 & (1 << (1-1) ) ) { enc_ch2 |= (1 << (1-1) ); }

				//dumptoBin( enc_ch1 ); dumptoBin( enc_ch2 );
				char buf[4]; sprintf( buf, "%c%c", enc_ch1, enc_ch2 );
				ret = buf;
			}
			else if( 0x0800 <= index && index <= 0xFFFF )	{
				// 3byte;
				char enc_ch1 = 0xE0; // 11100000
				char enc_ch2 = 0x80; // 10000000
				char enc_ch3 = 0x80; // 10000000

				char ch1 = (char)strtoul( ret.SubStr(0,2).GetStr(), 0, 16 );
				char ch2 = (char)strtoul( ret.SubStr(2,4).GetStr(), 0, 16 );

				if( ch1 & (1 << (8-1) ) ) { enc_ch1 |= (1 << (4-1) ); }
				if( ch1 & (1 << (7-1) ) ) { enc_ch1 |= (1 << (3-1) ); }
				if( ch1 & (1 << (6-1) ) ) { enc_ch1 |= (1 << (2-1) ); }
				if( ch1 & (1 << (5-1) ) ) { enc_ch1 |= (1 << (1-1) ); }

				if( ch1 & (1 << (4-1) ) ) { enc_ch2 |= (1 << (6-1) ); }
				if( ch1 & (1 << (3-1) ) ) { enc_ch2 |= (1 << (5-1) ); }
				if( ch1 & (1 << (2-1) ) ) { enc_ch2 |= (1 << (4-1) ); }
				if( ch1 & (1 << (1-1) ) ) { enc_ch2 |= (1 << (3-1) ); }

				if( ch2 & (1 << (8-1) ) ) { enc_ch2 |= (1 << (2-1) ); }
				if( ch2 & (1 << (7-1) ) ) { enc_ch2 |= (1 << (1-1) ); }
				if( ch2 & (1 << (6-1) ) ) { enc_ch3 |= (1 << (6-1) ); }
				if( ch2 & (1 << (5-1) ) ) { enc_ch3 |= (1 << (5-1) ); }

				if( ch2 & (1 << (4-1) ) ) { enc_ch3 |= (1 << (4-1) ); }
				if( ch2 & (1 << (3-1) ) ) { enc_ch3 |= (1 << (3-1) ); }
				if( ch2 & (1 << (2-1) ) ) { enc_ch3 |= (1 << (2-1) ); }
				if( ch2 & (1 << (1-1) ) ) { enc_ch3 |= (1 << (1-1) ); }

				//dumptoBin( enc_ch1 ); dumptoBin( enc_ch2 ); dumptoBin( enc_ch3 );

				char buf[4]; sprintf( buf, "%c%c%c", enc_ch1, enc_ch2, enc_ch3 );
				ret = buf;
			}
		}

		return ret;
	}