コード例 #1
0
ファイル: NUnicodeParser.cpp プロジェクト: refnum/nano
//============================================================================
//		NUnicodeParser::RemoveBOM : Remove a BOM prefix.
//----------------------------------------------------------------------------
void NUnicodeParser::RemoveBOM(NData &theData, NStringEncoding theEncoding) const
{   NStringEncoding		bomEncoding;
    NRange				theBOM;



    // Validate our parameters
    NN_ASSERT(NStringEncoder::IsEncodingUTF(theEncoding));
    NN_UNUSED(theEncoding);



    // Get the state we need
    bomEncoding = GetBOM(theData, theBOM);
    if (!theBOM.IsEmpty())
        theData.RemoveData(theBOM);



    // Validate the encoding
    //
    // Endian-specific BOMs should match the format we expected.
    switch (bomEncoding) {
    case kNStringEncodingInvalid:
        // No BOM
        break;

    case kNStringEncodingUTF8:
        NN_ASSERT(theEncoding == kNStringEncodingUTF8);
        break;

    case kNStringEncodingUTF16BE:
        NN_ASSERT(theEncoding == kNStringEncodingUTF16 || theEncoding == kNStringEncodingUTF16BE);
        break;

    case kNStringEncodingUTF16LE:
        NN_ASSERT(theEncoding == kNStringEncodingUTF16 || theEncoding == kNStringEncodingUTF16LE);
        break;

    case kNStringEncodingUTF32BE:
        NN_ASSERT(theEncoding == kNStringEncodingUTF32 || theEncoding == kNStringEncodingUTF32BE);
        break;

    case kNStringEncodingUTF32LE:
        NN_ASSERT(theEncoding == kNStringEncodingUTF32 || theEncoding == kNStringEncodingUTF32LE);
        break;

    default:
        NN_LOG("Invalid encoding: %d", theEncoding);
        break;
    }
}
コード例 #2
0
ファイル: NUnicodeParser.cpp プロジェクト: refnum/nano
//============================================================================
//		NUnicodeParser::AddBOM : Add a BOM prefix.
//----------------------------------------------------------------------------
void NUnicodeParser::AddBOM(NData &theData, NStringEncoding theEncoding) const
{   NRange		theRange;



    // Validate our parameters
    NN_ASSERT(NStringEncoder::IsEncodingUTF(theEncoding));
    NN_ASSERT(GetBOM(theData, theRange) == kNStringEncodingInvalid);

    (void) theRange;



    // Add the BOM
    switch (theEncoding) {
    case kNStringEncodingUTF8:
        AddBOMToUTF8(theData);
        break;

    case kNStringEncodingUTF16:
        AddBOMToUTF16(theData, kNEndianNative);
        break;

    case kNStringEncodingUTF16BE:
        AddBOMToUTF16(theData, kNEndianBig);
        break;

    case kNStringEncodingUTF16LE:
        AddBOMToUTF16(theData, kNEndianLittle);
        break;

    case kNStringEncodingUTF32:
        AddBOMToUTF32(theData, kNEndianNative);
        break;

    case kNStringEncodingUTF32BE:
        AddBOMToUTF32(theData, kNEndianBig);
        break;

    case kNStringEncodingUTF32LE:
        AddBOMToUTF16(theData, kNEndianLittle);
        break;

    default:
        NN_LOG("Invalid encoding: %d", theEncoding);
        break;
    }
}
コード例 #3
0
ファイル: NUnicodeParser.cpp プロジェクト: refnum/nano
//============================================================================
//		NUnicodeParser::Parse : Parse some data.
//----------------------------------------------------------------------------
void NUnicodeParser::Parse(const NData &theData, NStringEncoding theEncoding)
{   NRange		theRange;



    // Validate our parameters
    NN_ASSERT(NStringEncoder::IsEncodingUTF(theEncoding));



    // Set the value
    mData     = theData;
    mEncoding = theEncoding;

    (void) GetBOM(mData, theRange);



    // Identify the code points
    switch (mEncoding) {
    case kNStringEncodingUTF8:
        mCodePoints = GetCodePointsUTF8(theRange);
        break;

    case kNStringEncodingUTF16:
    case kNStringEncodingUTF16BE:
    case kNStringEncodingUTF16LE:
        mCodePoints = GetCodePointsUTF16(theRange);
        break;

    case kNStringEncodingUTF32:
    case kNStringEncodingUTF32BE:
    case kNStringEncodingUTF32LE:
        mCodePoints = GetCodePointsUTF32(theRange);
        break;

    default:
        NN_LOG("Invalid encoding: %d", theEncoding);
        break;
    }
}
コード例 #4
0
ファイル: BOMRecognizerFilter.cpp プロジェクト: chenbk85/QOR
	//------------------------------------------------------------------------------
	void CBOMRecognizerFilter::RecognizeBOM()
	{
		unsigned long ulBOMBytes = sculBOMBytes;
		byte* pBOMData = GetBOM( ulBOMBytes );
		if( ulBOMBytes == sculBOMBytes && pBOMData )
		{
			unsigned int uiSequence = 0;

			bool Match[ CBOMRecognizerFilter::sculPatterns ];

			unsigned long ulPattern = 0;

			for( ulPattern = 0; ulPattern < CBOMRecognizerFilter::sculPatterns; ulPattern++ )
			{
				Match[ ulPattern ] = true;
			}

			while( uiSequence < ulBOMBytes )
			{
				bool bDone = true;
				for( ulPattern = 0; ulPattern < CBOMRecognizerFilter::sculPatterns && Match[ ulPattern ]; ulPattern++ )
				{
					if( !( ( pBOMData[ uiSequence ] & Masks[ ulPattern ][ uiSequence ] ) == Patterns[ ulPattern ][ uiSequence ] ) )
					{
						Match[ ulPattern ] = false;
					}
					else
					{
						bDone = false;
					}
				}
				if( bDone )
				{
					break;
				}
				uiSequence++;
			};


			unsigned int uiMatchCount = 0;
			unsigned int uiMatch = 0;
			for( ulPattern = 0; ulPattern < CBOMRecognizerFilter::sculPatterns; ulPattern++ )
			{
				if( Match[ ulPattern ] )
				{
					uiMatchCount++;
					uiMatch = uiSequence;
				}
			}

			if( uiMatchCount > 0 )
			{
				AcknowledgeBOM( ulBOMBytes );
				if( uiMatchCount == 1 )
				{
					switch ( uiMatch )
					{
					case 0:
						m_RecognitionEvent.BOMType() = CBOMRecognizedEvent::eUTF16BE;
					case 1:
						m_RecognitionEvent.BOMType() = CBOMRecognizedEvent::eUTF16LE;
					case 2:
						m_RecognitionEvent.BOMType() = CBOMRecognizedEvent::eUTF8;
					}
					m_RecognitionEvent(); //BOM Matched
					m_bRecognized = true;
				}
			}
			else
			{
				RejectBOM( ulBOMBytes );
				m_RecognitionEvent.BOMType() = CBOMRecognizedEvent::eNone;
				m_RecognitionEvent();//No BOM
				m_bRecognized = true;
			}
		}
		
	}