예제 #1
0
파일: sceCcc.cpp 프로젝트: ANR2ME/ppsspp
int sceCccStrlenUTF16(u32 strAddr)
{
	const auto str = PSPConstWCharPointer::Create(strAddr);
	if (!str.IsValid())
	{
		ERROR_LOG(HLE, "sceCccStrlenUTF16(%08x): invalid pointer", strAddr);
		return 0;
	}
	DEBUG_LOG(HLE, "sceCccStrlenUTF16(%08x): invalid pointer", strAddr);
	return UTF16LE(str).length();
}
예제 #2
0
파일: sceCcc.cpp 프로젝트: BogsyP/ppsspp
int sceCccStrlenUTF16(u32 strAddr)
{
	PSPWCharPointer str;
	str = strAddr;

	if (!str.IsValid())
	{
		ERROR_LOG(HLE, "sceCccStrlenUTF16(%08x): invalid pointer", strAddr);
		return 0;
	}
	DEBUG_LOG(HLE, "sceCccStrlenUTF16(%08x): invalid pointer", strAddr);
	return UTF16LE(str).length();
}
TextEncoding TextEncoding::Palm(UInt8 encoding)
{
    switch (encoding) {
    // Latin Palm OS character encoding, and subsets.
    // PalmOS variant of CP1252, with 10 extra Greek characters
    case charEncodingPalmGSM:
        ThrowError(kTextUnsupportedEncodingErr);
    // PalmOS version of CP1252
    case charEncodingPalmLatin:
        return PalmLatin1();
    // Windows variant of 8859-1
    case charEncodingCP1252:
        return WindowsLatin1();
    // ISO 8859 Part 1
    case charEncodingISO8859_1:
        return ISOLatin1();
    // ISO 646-1991
    case charEncodingAscii:
        return ASCII();

    // Japanese Palm OS character encoding, and subsets.
    // PalmOS version of CP932
    case charEncodingPalmSJIS:
        return PalmShiftJIS();
    // Windows variant of ShiftJIS
    case charEncodingCP932:
        return WindowsShiftJIS();
    // Encoding for JIS 0208-1990 + 1-byte katakana
    case charEncodingShiftJIS:
        return ShiftJIS();

    // Unicode character encodings
    // UCS2 is UTF16 minus the surrogate pairs
    case charEncodingUCS2:
        return UCS2();
    case charEncodingUTF8:
        return UTF8();
    case charEncodingUTF7:
        return UTF7();
    case charEncodingUTF16:
        return UTF16();
    case charEncodingUTF16BE:
        return UTF16BE();
    case charEncodingUTF16LE:
        return UTF16LE();
    case charEncodingUTF32:
        return UTF32();
    case charEncodingUTF32BE:
        return UTF32BE();
    case charEncodingUTF32LE:
        return UTF32LE();
    // A variant of UTF7 used by the IMAP mail protocol to encode
    // mail folder names.  Documented in RFC 2060.  This does not
    // seem to be supported by the Macintosh text encoding converter.
    case charEncodingUTF7_IMAP:
        ThrowError(kTextUnsupportedEncodingErr);
    // I really don't know how this is different from UTF32...
    case charEncodingUCS4:
        return UTF32();

    // Latin character encodings
    case charEncodingCP850:
        return TextEncoding(kTextEncodingDOSLatin1, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP437:
        return TextEncoding(kTextEncodingDOSLatinUS, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP865:
        return TextEncoding(kTextEncodingDOSNordic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP860:
        return TextEncoding(kTextEncodingDOSPortuguese, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP861:
        return TextEncoding(kTextEncodingDOSIcelandic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP863:
        return TextEncoding(kTextEncodingDOSCanadianFrench, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP775:
        return TextEncoding(kTextEncodingDOSBalticRim, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacIslande:
        return TextEncoding(kTextEncodingMacIcelandic, kMacIcelandicStdDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacintosh:
        return TextEncoding(kTextEncodingMacRoman, kMacRomanDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP1257:
        return TextEncoding(kTextEncodingWindowsBalticRim, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO8859_3:
        return TextEncoding(kTextEncodingISOLatin3, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO8859_4:
        return TextEncoding(kTextEncodingISOLatin4, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Extended Latin character encodings
    case charEncodingISO8859_2:
        return TextEncoding(kTextEncodingISOLatin2, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP1250:
        return TextEncoding(kTextEncodingWindowsLatin2, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP852:
        return TextEncoding(kTextEncodingDOSLatin2, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // This doesn't seem to be implemented on the Mac, though there's
    // a description of it at http://www.kiv.zcu.cz/~luki/cs/cs-encodings-faq.html
    case charEncodingXKamenicky:
        ThrowError(kTextUnsupportedEncodingErr);
    case charEncodingMacXCroate:
        return TextEncoding(kTextEncodingMacCroatian, kMacCroatianDefaultVariant,
                            kTextEncodingDefaultFormat);
    // I just can't figure out which encoding this refers to.
    // It seems like Mac OS X Latin-2, but I don't know what that is.
    case charEncodingMacXLat2:
        ThrowError(kTextUnsupportedEncodingErr);
    case charEncodingMacXRomania:
        return TextEncoding(kTextEncodingMacRomanian, kMacRomanianDefaultVariant,
                            kTextEncodingDefaultFormat);
    // I think this must be the character set used by GSM phones in Europe.
    // http://www.csoft.co.uk/character_sets/gsm.htm
    case charEncodingGSM:
        ThrowError(kTextUnsupportedEncodingErr);

    // Japanese character encodings
    case charEncodingEucJp:
        return TextEncoding(kTextEncodingEUC_JP, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO2022Jp:
        return TextEncoding(kTextEncodingISO_2022_JP, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // This isn't so much an encoding as a heuristic...
    case charEncodingXAutoJp:
        ThrowError(kTextUnsupportedEncodingErr);

    // Greek character encodings
    case charEncodingISO8859_7:
        return TextEncoding(kTextEncodingISOLatinGreek, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP1253:
        return TextEncoding(kTextEncodingWindowsGreek, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP869:
        return TextEncoding(kTextEncodingDOSGreek2, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP737:
        return TextEncoding(kTextEncodingDOSGreek, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacXGr:
        return TextEncoding(kTextEncodingMacGreek, kMacGreekDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Cyrillic character encodings
    case charEncodingCP1251:
        return TextEncoding(kTextEncodingWindowsCyrillic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO8859_5:
        return TextEncoding(kTextEncodingISOLatinCyrillic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingKoi8R:
        return TextEncoding(kTextEncodingKOI8_R, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // I'm just guessing by elimination that they mean RFC 2319, Ukrainian here.
    case charEncodingKoi8:
        return TextEncoding(kTextEncodingKOI8_U, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP855:
        return TextEncoding(kTextEncodingDOSCyrillic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP866:
        return TextEncoding(kTextEncodingDOSRussian, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacCyr:
        return TextEncoding(kTextEncodingMacCyrillic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacUkraine:
        return TextEncoding(kTextEncodingMacUkrainian, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Turkish character encodings
    case charEncodingCP1254:
        return TextEncoding(kTextEncodingWindowsLatin5, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO8859_9:
        return TextEncoding(kTextEncodingISOLatin5, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP857:
        return TextEncoding(kTextEncodingDOSTurkish, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacTurc:
        return TextEncoding(kTextEncodingMacTurkish, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // This seems to be DOS Latin 3, but it's not listed in the Mac tables.
    case charEncodingCP853:
        ThrowError(kTextUnsupportedEncodingErr);

    // Arabic character encodings
    // The Mac tables don't make any distinction between ISO 8859-6, Asmo 708,
    // and DOS 708.  I hope it's OK to merge them...
    case charEncodingISO8859_6:
    case charEncodingAsmo708:
    case charEncodingAsmo708Plus:
    case charEncodingAsmo708Fr:
        return TextEncoding(kTextEncodingISOLatinArabic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP1256:
        return TextEncoding(kTextEncodingWindowsArabic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP864:
        return TextEncoding(kTextEncodingDOSArabic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingMacAra:
        return TextEncoding(kTextEncodingMacArabic, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Simplified Chinese character encodings
    case charEncodingGB2312:
        return TextEncoding(kTextEncodingGB_2312_80, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingHZ:
        return TextEncoding(kTextEncodingHZ_GB_2312, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingGBK:
        return TextEncoding(kTextEncodingGBK_95, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // I'm guessing that this is a Palm variation on GB 2312...
    case charEncodingPalmGB:
        return TextEncoding(kTextEncodingGB_2312_80, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Traditional Chinese character encodings
    case charEncodingBig5:
        return TextEncoding(kTextEncodingBig5, kBig5_BasicVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingBig5_HKSCS:
        return TextEncoding(kTextEncodingBig5_HKSCS_1999, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // I'm just guessing that by "Plus" Palm means the ETenVariant that Apple
    // says "adds kana, Cyrillic, radicals, etc with hi bytes C6-C8,F9"
    case charEncodingBig5Plus:
        return TextEncoding(kTextEncodingBig5, kBig5_ETenVariant,
                            kTextEncodingDefaultFormat);
    // No clue what Palm changed, so just use Big-5.
    case charEncodingPalmBig5:
        return TextEncoding(kTextEncodingBig5, kBig5_BasicVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO2022CN:
        return TextEncoding(kTextEncodingISO_2022_CN, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Vietnamese character encodings
    case charEncodingViscii:
        return TextEncoding(kTextEncodingVISCII, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // VIQR stands for VIetnamese Quoted-Readable,
    // which is a convention to write Vietnamese letters using 7-bit ASCII.
    // http://www.nonsong.org/viqr.html
    case charEncodingViqr:
        ThrowError(kTextUnsupportedEncodingErr);
    // Also known as VPS
    // http://homepages.cwi.nl/~dik/english/codes/stand.html#vncii
    case charEncodingVncii:
        ThrowError(kTextUnsupportedEncodingErr);
    // Several web site seem to think Vietnet is the same as VIQR
    case charEncodingVietnet:
        ThrowError(kTextUnsupportedEncodingErr);
    case charEncodingCP1258:
        return TextEncoding(kTextEncodingWindowsVietnamese, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Korean character encodings
    case charEncodingEucKr:
        return TextEncoding(kTextEncodingEUC_KR, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP949:
        return TextEncoding(kTextEncodingDOSKorean, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingISO2022Kr:
        return TextEncoding(kTextEncodingISO_2022_KR, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Hebrew character encodings
    case charEncodingISO8859_8I:
        return TextEncoding(kTextEncodingISOLatinHebrew, kISOLatinHebrewImplicitOrderVariant,
                            kTextEncodingDefaultFormat);
    // Comments in the Mac TextCommon.h make it clear that the "default"
    // variant of ISO 8859-8 is visual ordering.
    case charEncodingISO8859_8:
        return TextEncoding(kTextEncodingISOLatinHebrew, kISOLatinHebrewVisualOrderVariant,
                            kTextEncodingDefaultFormat);
    case charEncodingCP1255:
        return TextEncoding(kTextEncodingWindowsHebrew, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);
    // There's no explicit visual variant for cp1255, so I'm hoping that
    // using the default variant is better than throwing an exception...
    case charEncodingCP1255V:
        return TextEncoding(kTextEncodingWindowsHebrew, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    // Thai character encodings
    // TIS-620, http://www.langbox.com/codeset/tis620.html
    case charEncodingTis620:
        ThrowError(kTextUnsupportedEncodingErr);
    case charEncodingCP874:
        return TextEncoding(kTextEncodingDOSThai, kTextEncodingDefaultVariant,
                            kTextEncodingDefaultFormat);

    default:
        ThrowError(kTextUnsupportedEncodingErr);
    }

    // not reached
    return TextEncoding();
}