void CPLJSonStreamingParser::DecodeUnicode()
{
    constexpr char szReplacementUTF8[] = "\xEF\xBF\xBD";
    unsigned nUCSChar;
    if( m_osUnicodeHex.size() == 8 )
    {
        unsigned nUCSHigh = getUCSChar(m_osUnicodeHex);
        assert( IsHighSurrogate(nUCSHigh) );
        unsigned nUCSLow = getUCSChar(m_osUnicodeHex.substr(4));
        if( IsLowSurrogate(nUCSLow) )
        {
            nUCSChar = GetSurrogatePair(nUCSHigh, nUCSLow);
        }
        else
        {
            /* Invalid code point. Insert the replacement char */
            nUCSChar = 0xFFFFFFFFU;
        }
    }
    else
    {
        assert( m_osUnicodeHex.size() == 4 );
        nUCSChar = getUCSChar(m_osUnicodeHex);
    }

    if( nUCSChar < 0x80)
    {
        m_osToken += static_cast<char>(nUCSChar);
    }
    else if( nUCSChar < 0x800)
    {
        m_osToken += static_cast<char>(0xC0 | (nUCSChar >> 6));
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
    }
예제 #2
0
void LOCATOR::SetError(EC ec, const wchar_t *wsz)
{
    m_ec = ec;

    if (wsz == NULL) {
        m_wszError[0] = L'\0';
    }

    else {
        wcsncpy_s(m_wszError, _countof(m_wszError), wsz, _TRUNCATE);

        if (IsHighSurrogate(m_wszError[cbErrMax-2])) {
            m_wszError[cbErrMax-2] = L'\0';
        }
    }
}
예제 #3
0
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead)
{
    IMLangConvertCharset * p;
    TidyInputSource * source;
    CHAR inbuf[TC_INBUFSIZE] = { 0 };
    WCHAR outbuf[TC_OUTBUFSIZE] = { 0 };
    HRESULT hr = S_OK;
    size_t inbufsize = 0;

    assert( in != NULL );
    assert( &in->source != NULL );
    assert( bytesRead != NULL );
    assert( in->mlang != 0 );

    p = (IMLangConvertCharset *)in->mlang;
    source = &in->source;

    inbuf[inbufsize++] = (CHAR)firstByte;

    while(inbufsize < TC_INBUFSIZE)
    {
        UINT outbufsize = TC_OUTBUFSIZE;
        UINT readNow = inbufsize;
        int nextByte = EndOfStream;

        hr = IMLangConvertCharset_DoConversionToUnicode(p, inbuf, &readNow, outbuf, &outbufsize);

        assert( hr == S_OK );
        assert( outbufsize <= 2 );

        if (outbufsize == 2)
        {
            /* U+10000-U+10FFFF are returned as a pair of surrogates */
            tchar m = (tchar)outbuf[0];
            tchar n = (tchar)outbuf[1];
            assert( IsHighSurrogate(n) && IsLowSurrogate(m) );
            *bytesRead = readNow;
            return (int)CombineSurrogatePair(n, m);
        }

        if (outbufsize == 1)
        {
            /* we found the character   */
            /* set bytesRead and return */
            *bytesRead = readNow;
            return (int)outbuf[0];
        }

        /* we need more bytes */
        nextByte = source->getByte(source->sourceData);

        if (nextByte == EndOfStream)
        {
            /* todo: error message for broken stream? */

            *bytesRead = readNow;
            return EndOfStream;
        }

        inbuf[inbufsize++] = (CHAR)nextByte;
    }

    /* No full character found after reading TC_INBUFSIZE bytes, */
    /* give up to read this stream, it's obviously unreadable.   */

    /* todo: error message for broken stream? */
    return EndOfStream;
}
예제 #4
0
// Find the line break.
void CTmLine::BreakL(CTmFormatContext& aContext,TInt aStartXCoord,TInt aWrapXCoord)
	{
	aContext.iLineEndsInForcedLineBreak = EFalse;
	TInt truncate_x_coord = aWrapXCoord;
	TBool truncating_with_ellipsis = aContext.iParam.IsTruncatingWithEllipsis(); // ####################
	if (truncating_with_ellipsis)
		truncate_x_coord = aContext.iParam.iWrapWidth - aContext.iRightMargin;

	// The following character positions are all relative to the document, not the line.
	TInt start_pos = aContext.iInfo.iEndChar;	// line start
	TInt truncate_pos_with_ellipsis = KMaxTInt;	// position of truncation, allowing room for an ellipsis
	TInt truncate_pos = KMaxTInt;				// position of truncation
	TInt wrap_pos = KMaxTInt;					// position at the full measure
	TInt break_pos = KMaxTInt;					// legal break position

	CTmLine::TEllipsisInfo ellipsisInfo;		// structure used to carry info back from AppendChunk(s)L
	
	// Fill the line up to the truncation width.
	TInt cur_pos = start_pos;
	TInt cur_x_coord = aStartXCoord;

	TInt maxChar = aContext.iDocumentLength + 1;
	
	AppendChunksL(aContext, cur_pos, cur_x_coord, maxChar, truncate_x_coord, truncating_with_ellipsis, ellipsisInfo);
	iEllipsisFormat = ellipsisInfo.iEllipsisFormat;
	iEllipsisWidth = ellipsisInfo.iEllipsisWidth;
	truncate_pos_with_ellipsis = cur_pos;
	if (truncating_with_ellipsis)
		{
		AppendChunksL(aContext, cur_pos, cur_x_coord, maxChar, truncate_x_coord, EFalse, ellipsisInfo);
		}
	truncate_pos = cur_pos;

	// Fill the line up to the wrap width if it is greater than the truncation width.
	if (aWrapXCoord > truncate_x_coord)
		{
		AppendChunksL(aContext, cur_pos, cur_x_coord, maxChar, aWrapXCoord, EFalse, ellipsisInfo);
		}
	wrap_pos = cur_pos;

	// If the line has not been ended by a paragraph break or forced line end find a possible break.
	TBool line_break_found = FALSE;
	if (ellipsisInfo.iAtLineEnd || ellipsisInfo.iAtParEnd)
		{
		break_pos = wrap_pos;
		line_break_found = TRUE;
		aContext.iLineEndsInForcedLineBreak = ETrue;
		}
	TBool get_line_break_forwards = FALSE;
	TInt min_break_pos = start_pos + 1;
	TInt hanging_break_pos = break_pos;
	TInt break_pos_after_spaces = break_pos;
	while (!line_break_found)
		{
		TInt max_break_pos = cur_pos;
		TInt context_end_pos = Min(aContext.iDocumentLength + 1,
			max_break_pos + MTmSource::ELineBreakContext);
		
		// Check if context_end_pos points to surrogate high part
		// If it is, the line break cannot correct, then adjust context_end_pos
		if ( cur_pos <= aContext.iDocumentLength )
		    {
            TTmCharFormat new_format;
            TPtrC text;
            aContext.iSource.GetText( cur_pos, text, new_format );
            if ( context_end_pos > cur_pos && text.Length() > 1 &&
                    IsHighSurrogate( text[ text.Length() - 1 ] ) )
                {
                --context_end_pos;
                }
		    }
		
		// Append line breaking context.
		AppendChunksL(aContext, cur_pos, cur_x_coord, context_end_pos, KMaxTInt, EFalse, ellipsisInfo);
		TInt chars_appended = cur_pos - max_break_pos;
		TPtrC p(iTextWithoutChunkOverlaps.Ptr(),iTextWithoutChunkOverlaps.Length());
		if (aContext.iSource.GetLineBreakL(p,start_pos,min_break_pos - start_pos,max_break_pos - start_pos,
										   get_line_break_forwards,break_pos,hanging_break_pos,break_pos_after_spaces))	
			{
			if (hanging_break_pos > iTextWithoutChunkOverlaps.Length() || break_pos_after_spaces > iTextWithoutChunkOverlaps.Length())
				TmPanic(EBadLineBreak);
			break_pos += start_pos;
			hanging_break_pos += start_pos;
			break_pos_after_spaces += start_pos;
			if (break_pos < min_break_pos || break_pos > max_break_pos)
				TmPanic(EBadLineBreak);
			if (hanging_break_pos < break_pos || break_pos_after_spaces < hanging_break_pos)
				TmPanic(EBadLineBreak);
			line_break_found = TRUE;
			}
		// If no break has been found and no more characters could be added, break here.
		else if (chars_appended == 0)
			{
			break_pos = hanging_break_pos = break_pos_after_spaces = max_break_pos;
			line_break_found = TRUE;
			}

		/*
		If no line break has been found, and if we prefer to break at an illegal position rather
		than exceeding the measure, break at the truncation position but make sure the line
		contains at least one character.
		*/
		if (!line_break_found && !aContext.iParam.LegalLineBreaksOnly())
			{
			break_pos = hanging_break_pos = break_pos_after_spaces = Max(wrap_pos,min_break_pos);
			line_break_found = TRUE;
			}

		/*
		After the first iteration, switch to searching forwards for a line break, because we know that the
		text is now too wide and we want the shortest possible overlength line.
		*/
		get_line_break_forwards = TRUE;
		min_break_pos = max_break_pos;
		}

	if (truncating_with_ellipsis && break_pos > truncate_pos)
		{
		iCountedEndChar = iHangingEndChar = iEndChar = truncate_pos_with_ellipsis;
		iEllipsis = aContext.iParam.iEllipsis;
		}
	else
		{
		iCountedEndChar = break_pos;
		iHangingEndChar = hanging_break_pos;
		iEndChar = break_pos_after_spaces;
		}

	// Determine whether the line ends at the end of the paragraph.
	iAtParEnd = break_pos_after_spaces == ellipsisInfo.iParEndPos;
	if (iAtParEnd)
		aContext.iLineEndsInForcedLineBreak = ETrue;

	iNextLineStartChar = break_pos_after_spaces;

	/*
	Truncate the text after the displayed characters, ensuring there is a break of chunk at this point.
	This makes it easier to justify the line and write the bytecode.
	*/
	if (iCountedEndChar < cur_pos)
		{
		int chunk_start_pos = start_pos;
		int chunk_end_pos = start_pos;
		int chunk_index = 0;
		cur_x_coord = aStartXCoord;
		// find the chunk that contains the end of the text,(iCountedEndChar)
		while (chunk_index < iChunk.Length())
			{
			chunk_end_pos += (iChunk[chunk_index].iTextLength - iChunk[chunk_index].iOverlappingChars);
			if (chunk_end_pos > iCountedEndChar)
				break;
			cur_x_coord += iChunk[chunk_index].iWidth;
			chunk_start_pos = chunk_end_pos;
			chunk_index++;
			}
		//  remove the chunk that contains the last bit of text and any after
		iChunk.Truncate(chunk_index);
		iRunInfo.Truncate(chunk_index);
		iText.Truncate(chunk_start_pos - start_pos);
		cur_pos = chunk_start_pos;

		// add a chunks so there is just enough text to reach the end
		while (cur_pos < iCountedEndChar)
			{
			if (!AppendChunkL(aContext,cur_pos,cur_x_coord,iCountedEndChar,KMaxTInt,FALSE,ellipsisInfo))
				break;
			}
		while (cur_pos < iHangingEndChar)
			{
			if (!AppendChunkL(aContext,cur_pos,cur_x_coord,iHangingEndChar,KMaxTInt,FALSE,ellipsisInfo))
				break;
			}
			
		// If the last chunk is a soft hyphen restore its full width.		
		if (iChunk.Length())
			{
			TTmChunk& last_chunk = iChunk[iChunk.Length() - 1];
			if (last_chunk.iType == TTmChunk::ESoftHyphenChunk)
				last_chunk.iWidth = last_chunk.iStandardWidth;
			}

		}
	}