Exemple #1
0
/*
 * Trim redundant characters
 */
static
VOID
TrimPunctuation(
    _Inout_ PWSTR pStr)
{
    SIZE_T Length;
    UINT i = 0;

    if (!pStr)
        return;

    Length = wcslen(pStr);
    if (Length == 0)
        return;

    /* Trim leading characters */
    while (i < Length && IsPunctuation(pStr[i]))
    {
        i++;
    }

    if (i > 0)
    {
        Length -= i;
        memmove(pStr, pStr + i, (Length + 1) * sizeof(WCHAR));
    }

    /* Trim trailing characters */
    while (Length && IsPunctuation(pStr[Length-1]))
    {
        pStr[Length-1] = L'\0';
        --Length;
    }
}
//-----------------------------------------------------------------------------
// Update scan codes for foreign keyboards
//-----------------------------------------------------------------------------
void ButtonCode_UpdateScanCodeLayout( )
{
	// reset the keyboard
	memcpy( s_pScanToButtonCode, s_pScanToButtonCode_QWERTY, sizeof(s_pScanToButtonCode) );

#if !defined( _X360 )
	// fix up keyboard layout for other languages
	HKL currentKb = ::GetKeyboardLayout( 0 );
	HKL englishKb = ::LoadKeyboardLayout("00000409", 0);

	if (englishKb && englishKb != currentKb)
	{
		for ( int i = 0; i < ARRAYSIZE(s_pScanToButtonCode); i++ )
		{
			// take the english/QWERTY
			ButtonCode_t code = s_pScanToButtonCode_QWERTY[ i ];

			// only remap printable keys
			if ( code != KEY_NONE && code != KEY_BACKQUOTE && ( IsAlphaNumeric( code ) || IsPunctuation( code ) ) )
			{
				// get it's virtual key based on the old layout
				int vk = ::MapVirtualKeyEx( i, 1, englishKb );

				// turn in into a scancode on the new layout
				int newScanCode = ::MapVirtualKeyEx( vk, 0, currentKb );

				// strip off any high bits
				newScanCode &= 0x0000007F;

				// set in the new layout
				s_pScanToButtonCode[newScanCode] = code;
			}
		}
	}

	s_pScanToButtonCode[0] = KEY_NONE;
#endif
}
//------------------------------------------------------------------------------
bool Tokeniser::ParseToken ()
{
	token = "";
	while ((curChar != '\0') && !IsWhiteSpace(curChar) && !IsPunctuation(curChar))
	{
		if (curChar == '_')
			token += ' ';
		else
			token += curChar;
		curChar = GetNextChar ();
	}
	if (!atEOL)
	{
#ifdef __MWERKS__
		putBuffer = curChar;
#else
		putBackChar = curChar;
		//in.putback (curChar);
#endif
		filecol--;
	}
	return true;
}
Exemple #4
0
/*!
 The function writes the string \c str to the output object that
 was specified in the wxJSONWriter::Write() function.
 The function may split strings in two or more lines if the
 string contains LF characters if the \c m_style data member contains
 the wxJSONWRITER_SPLIT_STRING flag.

 The function does not actually write the string: for every character
 in the provided string the function calls WriteChar() which does
 the actual character output.

 The function returns ZERO on success or -1 in case of errors.
*/
int
wxJSONWriter::WriteStringValue( wxOutputStream& os, const wxString& str )
{
    // JSON values of type STRING are written by converting the whole string
    // to UTF-8 and then copying the UTF-8 buffer to the 'os' stream
    // one byte at a time and processing them
    os.PutC( '\"' );        // open quotes

    // the buffer that has to be written is either UTF-8 or ANSI c_str() depending
    // on the 'm_noUtf8' flag
    char* writeBuff = 0;
    wxCharBuffer utf8CB = str.ToUTF8();        // the UTF-8 buffer
#if !defined( wxJSON_USE_UNICODE )
    wxCharBuffer ansiCB( str.c_str());        // the ANSI buffer
    if ( m_noUtf8 )    {
        writeBuff = ansiCB.data();
    }
    else    {
        writeBuff = utf8CB.data();
    }
#else
        writeBuff = utf8CB.data();
#endif

    // NOTE: in ANSI builds UTF-8 conversion may fail (see samples/test5.cpp,
    // test 7.3) although I do not know why
    if ( writeBuff == 0 )    {
        const char* err = "<wxJSONWriter::WriteStringValue(): error converting the string to a UTF8 buffer>";
        os.Write( err, strlen( err ));
        return 0;
    }
    size_t len = strlen( writeBuff );
    int lastChar = 0;

    // store the column at which the string starts
    // splitting strings only happen if the string starts within
    // column wxJSONWRITER_LAST_COL (default 50)
    // see 'include/wx/json_defs.h' for the defines
    int tempCol = m_colNo;

    // now write the UTF8 buffer processing the bytes
    size_t i;
    for ( i = 0; i < len; i++ ) {
        bool shouldEscape = false;
        unsigned char ch = *writeBuff;
        ++writeBuff;        // point to the next byte

        // the escaped character
        char escCh = 0;

        // for every character we have to check if it is a character that
        // needs to be escaped: note that characters that should be escaped
        // may be not if some writer's flags are specified
        switch ( ch )  {
        case '\"' :     // quotes
            shouldEscape = true;
            escCh = '\"';
            break;
        case '\\' :     // reverse solidus
            shouldEscape = true;
            escCh = '\\';
            break;
        case '/'  :     // solidus
            shouldEscape = true;
            escCh = '/';
            break;
        case '\b' :     // backspace
            shouldEscape = true;
            escCh = 'b';
            break;
        case '\f' :     // formfeed
            shouldEscape = true;
            escCh = 'f';
            break;
        case '\n' :     // newline
            shouldEscape = true;
            escCh = 'n';
            break;
        case '\r' :     // carriage-return
            shouldEscape = true;
            escCh = 'r';
            break;
        case '\t' :      // horizontal tab
            shouldEscape = true;
            escCh = 't';
            break;
        default :
            shouldEscape = false;
            break;
        }        // end switch


        // if the character is a control character that is not identified by a
        // lowercase letter, we should escape it
        if ( !shouldEscape && ch < 32 )  {
            char b[8];
            snprintf( b, 8, "\\u%04X", (int) ch );
            os.Write( b, 6 );
            if ( os.GetLastError() != wxSTREAM_NO_ERROR )    {
                return -1;
            }
        }

        // the char is not a control character
        else {
            // some characters that should be escaped are not escaped
            // if the writer was constructed with some flags
            if ( shouldEscape && !( m_style & wxJSONWRITER_ESCAPE_SOLIDUS) )  {
                if ( ch == '/' )  {
                    shouldEscape = false;
                }
            }
            if ( shouldEscape && (m_style & wxJSONWRITER_MULTILINE_STRING))  {
                if ( ch == '\n' || ch == '\t' )  {
                    shouldEscape = false;
                }
            }


            // now write the character prepended by ESC if it should be escaped
            if ( shouldEscape )  {
                os.PutC( '\\' );
                os.PutC( escCh );
                if ( os.GetLastError() != wxSTREAM_NO_ERROR )    {
                    return -1;
                }
            }
            else {
                //  a normal char or a UTF-8 units: write the character
                os.PutC( ch );
                if ( os.GetLastError() != wxSTREAM_NO_ERROR )    {
                    return -1;
                }
            }
        }

        // check if SPLIT_STRING flag is set and if the string has to
        // be splitted
        if ( (m_style & wxJSONWRITER_STYLED) && (m_style & wxJSONWRITER_SPLIT_STRING))   {
            // split the string if the character written is LF
            if ( ch == '\n' ) {
                // close quotes and CR
                os.Write( "\"\n", 2 );
                lastChar = WriteIndent( os, m_level + 2 );     // write indentation
                os.PutC( '\"' );               // reopen quotes
                if ( lastChar < 0 )  {
                    return lastChar;
                }
            }
            // split the string only if there is at least wxJSONWRITER_MIN_LENGTH
            // character to write and the character written is a punctuation or space
            // BUG: the following does not work because the columns are not counted
            else if ( (m_colNo >= wxJSONWRITER_SPLIT_COL)
                     && (tempCol <= wxJSONWRITER_LAST_COL )) {
                if ( IsSpace( ch ) || IsPunctuation( ch ))  {
                    if ( len - i > wxJSONWRITER_MIN_LENGTH )  {
                        // close quotes and CR
                        os.Write( "\"\n", 2 );
                        lastChar = WriteIndent( os, m_level + 2 );     // write indentation
                        os.PutC( '\"' );           // reopen quotes
                        if ( lastChar < 0 )  {
                            return lastChar;
                        }
                    }
                }
            }
        }
    }            // end for
    os.PutC( '\"' );    // close quotes
    return 0;
}
Exemple #5
0
//------------------------------------------------------------------------------
// Parse a number (integer or real).
Tokeniser::tokentype Tokeniser::ParseNumber ()
{
	enum {
		start		= 0x0001, // 0
		sign		= 0x0002, // 1
		digit		= 0x0004, // 2
		fraction	= 0x0008, // 3
		expsymbol	= 0x0010, // 4
		expsign		= 0x0020, // 5
		exponent 	= 0x0040, // 6
		bad			= 0x0080,
		done		= 0x0100
    } state;

    tokentype result = BAD;

	token = "";
	state = start;

	while (!IsWhiteSpace (curChar)
		&& !(IsPunctuation (curChar) && (curChar != '-'))
		&& (state != bad)
		&& (state != done))
	{
		if (isdigit (curChar))
		{
			switch (state)
			{
				case start:
				case sign:
					state = digit;
					break;
				case expsymbol:
				case expsign:
					state = exponent;
					break;
				default:
					break;
			}
		}
		else if ((curChar == '-') || (curChar == '+'))
		{
			switch (state)
			{
				case start:
					state = sign;		// sign of number
					break;
				case digit:
					state = done;		// minus sign is punctuation, such as 6-10
					break;
				case expsymbol:
					state = expsign;	// sign of exponent
					break;
				default:
					state = bad;		// syntax error
					break;
			}
		}
		else if ((curChar == '.') && (state == digit))
        			state = fraction;
		else if (((curChar == 'E') || (curChar == 'e')) && (state & (digit | fraction)))
			state = expsymbol;
		else
			state = bad;

		if ((state != bad) && (state != done))
		{
			token += curChar;
			curChar = GetNextChar ();
		}
	}

	int isNumber =  state & (digit | fraction | exponent | done);
	if (isNumber)
	{
		// We have a number
		result = NUMBER;

		if (IsPunctuation (curChar))
		{
#ifdef __MWERKS__
			putBuffer = curChar;
#else
			in.putback (curChar);
#endif
			if (!atEOL)
				filecol--;
		}
	}
	else
    {
		// Not a number, but a string that starts with numbers, such as "00BW0762.1"
			do {
				if (curChar == '_')
					token += ' ';
				else
					token += curChar;
            	curChar = GetNextChar ();
 			} while (isalnum (curChar) || (curChar == '_') || (curChar == '.'));
			if (IsPunctuation (curChar))
			{
#ifdef __MWERKS__
				putBuffer = curChar;
#else
				in.putback (curChar);
#endif
				if (!atEOL)
					filecol--;
			}

			result = STRING; //classify the token

    }


	return  result;
}
Exemple #6
0
//------------------------------------------------------------------------------
Tokeniser::tokentype Tokeniser::GetNextToken ()
{
	tokentype TokenType = EMPTY;

	while ((TokenType == EMPTY) && !in.bad() && !atEOF)
	{
		curChar = GetNextChar ();

		if (IsWhiteSpace (curChar))
		{
		// skip white space
		}
		else
		{
			if (IsPunctuation (curChar))
			{
 				// classify punctuation token
				switch (curChar)
				{
					case '[': ParseComment (); break;
					case '\'':
						if (ParseString ())
							TokenType = STRING;
						else TokenType = BAD;
						break;
					case '(':
						TokenType = LPAR;
						break;
					case ')':
						TokenType = RPAR;
						break;
					case '{':
						TokenType = LPAR;
						break;
					case '}':
						TokenType = RPAR;
						break;
					case '!':
						TokenType = BANG;
						break;
					case '#':
						TokenType = HASH;
						break;
					case '=':
						TokenType = EQUALS;
						break;
					case ';':
						TokenType = SEMICOLON;
						break;
					case ',':
						TokenType = COMMA;
						break;
					case '*':
						TokenType = ASTERIX;
						break;
					case ':':
						TokenType = COLON;
						break;
					case '-':
						TokenType = MINUS;
						break;
					case '"':
						TokenType = DOUBLEQUOTE;
						break;
					case '/':
						TokenType = BACKSLASH;
						break;
					default:
						TokenType = OTHER;
						break;
				}
			}
			else
			{
            	// It's either a number, or a string
				if (isdigit (curChar))
				{
					TokenType = ParseNumber();
				}
				else
                {
					if (ParseToken ())
						TokenType = STRING;
					else TokenType = BAD;
				}
			}
		}
	}

	if ((TokenType != STRING) && (TokenType != NUMBER))
	{
		token = "";
		token += curChar;
	}
	return TokenType;
}
Exemple #7
0
/*----------------------------------------------------------------------------------------------------------------------
|	Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number 
|	of useful operations in the process of retrieving tokens:
|~
|	o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores
|	  is set)
|	o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved 
|	  as the next token
|	o paired single quotes are automatically converted to single quotes before being stored
|	o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to 
|	  the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to 
|	  handle these in an appropriate fashion)
|	o leading whitespace (including comments) is automatically skipped
|	o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF 
|	  member function
|	o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper 
|	  for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags,
|	  in which case the normal punctuation symbols are treated just like any other darkspace character.
|~
|	The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments 
|	can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as 
|	tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after 
|	each application.
*/
void NxsToken::GetNextToken()
	{
	ResetToken();

	char ch = ' ';
	if (saved == '\0' || IsWhitespace(saved))
		{
		// Skip leading whitespace
		//
		while( IsWhitespace(ch) && !atEOF)
			ch = GetNextChar();
		saved = ch;
		}

	for(;;)
		{
		// Break now if singleCharacterToken mode on and token length > 0.
		//
		if (labileFlags & singleCharacterToken && token.size() > 0)
			break;

		// Get next character either from saved or from input stream.
		//
		if (saved != '\0')
			{
			ch = saved;
			saved = '\0';
			}
		else
			ch = GetNextChar();

		// Break now if we've hit EOF.
		//
		if (atEOF)
			break;

		if (ch == '\n' && labileFlags & newlineIsToken)
			{
			if (token.size() > 0)
				{
				// Newline came after token, save newline until next time when it will be 
				// reported as a separate token.
				//
				atEOL = 0;
				saved = ch;
				}
			else
				{
				atEOL = 1;
				AppendToToken(ch);
				}
			break;
			}

		else if (IsWhitespace(ch))
			{
			// Break only if we've begun adding to token (remember, if we hit a comment before a token,
			// there might be further white space between the comment and the next token).
			//
			if (token.size() > 0)
				break;
			}

		else if (ch == '_')
			{
			// If underscores are discovered in unquoted tokens, they should be 
			// automatically converted to spaces.
			//
			if (!(labileFlags & preserveUnderscores))
				ch = ' ';
			AppendToToken(ch);
			}

		else if (ch == '[')
			{
			// Get rest of comment and deal with it, but notice that we only break if the comment ends a token,
			// not if it starts one (comment counts as whitespace). In the case of command comments 
			// (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because
			// token.size() will be greater than 0.
			comment.clear();
			GetComment();
			if (token.size() > 0)
			break;
			}

		else if (ch == '(' && labileFlags & parentheticalToken)
			{
			AppendToToken(ch);

			// Get rest of parenthetical token.
			//
			GetParentheticalToken();
			break;
			}

		else if (ch == '{' && labileFlags & curlyBracketedToken)
			{
			AppendToToken(ch);

			// Get rest of curly-bracketed token.
			//
			GetCurlyBracketedToken();
			break;
			}

		else if (ch == '\"' && labileFlags & doubleQuotedToken)
			{
			// Get rest of double-quoted token.
			//
			GetDoubleQuotedToken();
			break;
			}

		else if (ch == '\'')
			{
			if (token.size() > 0)
				{
				// We've encountered a single quote after a token has
				// already begun to be read; should be another tandem
				// single quote character immediately following.
				//
				ch = GetNextChar();
				if (ch == '\'')
					AppendToToken(ch);
				else
					{
					errormsg = "Expecting second single quote character";
					throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn());
					}
				}
			else
				{
				// Get rest of quoted NEXUS word and break, since
				// we will have eaten one token after calling GetQuoted.
				//
				GetQuoted();
				}
			break;
			}

		else if (IsPunctuation(ch))
			{
			if (token.size() > 0)
				{
				// If we've already begun reading the token, encountering
				// a punctuation character means we should stop, saving
				// the punctuation character for the next token.
				//
				saved = ch;
				break;
				}
			else
				{
				// If we haven't already begun reading the token, encountering
				// a punctuation character means we should stop and return
				// the punctuation character as this token (i.e., the token
				// is just the single punctuation character.
				//
				AppendToToken(ch);
				break;
				}
			}

		else
			{
			AppendToToken(ch);
			}

		}

	labileFlags = 0;
	}