/* * Trim redundant characters */ static VOID TrimPunctuation( _Inout_ PWSTR pStr) { SIZE_T Length; UINT i = 0; if (!pStr) return; Length = wcslen(pStr); if (Length == 0) return; /* Trim leading characters */ while (i < Length && IsPunctuation(pStr[i])) { i++; } if (i > 0) { Length -= i; memmove(pStr, pStr + i, (Length + 1) * sizeof(WCHAR)); } /* Trim trailing characters */ while (Length && IsPunctuation(pStr[Length-1])) { pStr[Length-1] = L'\0'; --Length; } }
//----------------------------------------------------------------------------- // Update scan codes for foreign keyboards //----------------------------------------------------------------------------- void ButtonCode_UpdateScanCodeLayout( ) { // reset the keyboard memcpy( s_pScanToButtonCode, s_pScanToButtonCode_QWERTY, sizeof(s_pScanToButtonCode) ); #if !defined( _X360 ) // fix up keyboard layout for other languages HKL currentKb = ::GetKeyboardLayout( 0 ); HKL englishKb = ::LoadKeyboardLayout("00000409", 0); if (englishKb && englishKb != currentKb) { for ( int i = 0; i < ARRAYSIZE(s_pScanToButtonCode); i++ ) { // take the english/QWERTY ButtonCode_t code = s_pScanToButtonCode_QWERTY[ i ]; // only remap printable keys if ( code != KEY_NONE && code != KEY_BACKQUOTE && ( IsAlphaNumeric( code ) || IsPunctuation( code ) ) ) { // get it's virtual key based on the old layout int vk = ::MapVirtualKeyEx( i, 1, englishKb ); // turn in into a scancode on the new layout int newScanCode = ::MapVirtualKeyEx( vk, 0, currentKb ); // strip off any high bits newScanCode &= 0x0000007F; // set in the new layout s_pScanToButtonCode[newScanCode] = code; } } } s_pScanToButtonCode[0] = KEY_NONE; #endif }
//------------------------------------------------------------------------------ bool Tokeniser::ParseToken () { token = ""; while ((curChar != '\0') && !IsWhiteSpace(curChar) && !IsPunctuation(curChar)) { if (curChar == '_') token += ' '; else token += curChar; curChar = GetNextChar (); } if (!atEOL) { #ifdef __MWERKS__ putBuffer = curChar; #else putBackChar = curChar; //in.putback (curChar); #endif filecol--; } return true; }
/*! The function writes the string \c str to the output object that was specified in the wxJSONWriter::Write() function. The function may split strings in two or more lines if the string contains LF characters if the \c m_style data member contains the wxJSONWRITER_SPLIT_STRING flag. The function does not actually write the string: for every character in the provided string the function calls WriteChar() which does the actual character output. The function returns ZERO on success or -1 in case of errors. */ int wxJSONWriter::WriteStringValue( wxOutputStream& os, const wxString& str ) { // JSON values of type STRING are written by converting the whole string // to UTF-8 and then copying the UTF-8 buffer to the 'os' stream // one byte at a time and processing them os.PutC( '\"' ); // open quotes // the buffer that has to be written is either UTF-8 or ANSI c_str() depending // on the 'm_noUtf8' flag char* writeBuff = 0; wxCharBuffer utf8CB = str.ToUTF8(); // the UTF-8 buffer #if !defined( wxJSON_USE_UNICODE ) wxCharBuffer ansiCB( str.c_str()); // the ANSI buffer if ( m_noUtf8 ) { writeBuff = ansiCB.data(); } else { writeBuff = utf8CB.data(); } #else writeBuff = utf8CB.data(); #endif // NOTE: in ANSI builds UTF-8 conversion may fail (see samples/test5.cpp, // test 7.3) although I do not know why if ( writeBuff == 0 ) { const char* err = "<wxJSONWriter::WriteStringValue(): error converting the string to a UTF8 buffer>"; os.Write( err, strlen( err )); return 0; } size_t len = strlen( writeBuff ); int lastChar = 0; // store the column at which the string starts // splitting strings only happen if the string starts within // column wxJSONWRITER_LAST_COL (default 50) // see 'include/wx/json_defs.h' for the defines int tempCol = m_colNo; // now write the UTF8 buffer processing the bytes size_t i; for ( i = 0; i < len; i++ ) { bool shouldEscape = false; unsigned char ch = *writeBuff; ++writeBuff; // point to the next byte // the escaped character char escCh = 0; // for every character we have to check if it is a character that // needs to be escaped: note that characters that should be escaped // may be not if some writer's flags are specified switch ( ch ) { case '\"' : // quotes shouldEscape = true; escCh = '\"'; break; case '\\' : // reverse solidus shouldEscape = true; escCh = '\\'; break; case '/' : // solidus shouldEscape = true; escCh = '/'; break; case '\b' : // backspace shouldEscape = true; escCh = 'b'; break; case '\f' : // formfeed shouldEscape = true; escCh = 'f'; break; case '\n' : // newline shouldEscape = true; escCh = 'n'; break; case '\r' : // carriage-return shouldEscape = true; escCh = 'r'; break; case '\t' : // horizontal tab shouldEscape = true; escCh = 't'; break; default : shouldEscape = false; break; } // end switch // if the character is a control character that is not identified by a // lowercase letter, we should escape it if ( !shouldEscape && ch < 32 ) { char b[8]; snprintf( b, 8, "\\u%04X", (int) ch ); os.Write( b, 6 ); if ( os.GetLastError() != wxSTREAM_NO_ERROR ) { return -1; } } // the char is not a control character else { // some characters that should be escaped are not escaped // if the writer was constructed with some flags if ( shouldEscape && !( m_style & wxJSONWRITER_ESCAPE_SOLIDUS) ) { if ( ch == '/' ) { shouldEscape = false; } } if ( shouldEscape && (m_style & wxJSONWRITER_MULTILINE_STRING)) { if ( ch == '\n' || ch == '\t' ) { shouldEscape = false; } } // now write the character prepended by ESC if it should be escaped if ( shouldEscape ) { os.PutC( '\\' ); os.PutC( escCh ); if ( os.GetLastError() != wxSTREAM_NO_ERROR ) { return -1; } } else { // a normal char or a UTF-8 units: write the character os.PutC( ch ); if ( os.GetLastError() != wxSTREAM_NO_ERROR ) { return -1; } } } // check if SPLIT_STRING flag is set and if the string has to // be splitted if ( (m_style & wxJSONWRITER_STYLED) && (m_style & wxJSONWRITER_SPLIT_STRING)) { // split the string if the character written is LF if ( ch == '\n' ) { // close quotes and CR os.Write( "\"\n", 2 ); lastChar = WriteIndent( os, m_level + 2 ); // write indentation os.PutC( '\"' ); // reopen quotes if ( lastChar < 0 ) { return lastChar; } } // split the string only if there is at least wxJSONWRITER_MIN_LENGTH // character to write and the character written is a punctuation or space // BUG: the following does not work because the columns are not counted else if ( (m_colNo >= wxJSONWRITER_SPLIT_COL) && (tempCol <= wxJSONWRITER_LAST_COL )) { if ( IsSpace( ch ) || IsPunctuation( ch )) { if ( len - i > wxJSONWRITER_MIN_LENGTH ) { // close quotes and CR os.Write( "\"\n", 2 ); lastChar = WriteIndent( os, m_level + 2 ); // write indentation os.PutC( '\"' ); // reopen quotes if ( lastChar < 0 ) { return lastChar; } } } } } } // end for os.PutC( '\"' ); // close quotes return 0; }
//------------------------------------------------------------------------------ // Parse a number (integer or real). Tokeniser::tokentype Tokeniser::ParseNumber () { enum { start = 0x0001, // 0 sign = 0x0002, // 1 digit = 0x0004, // 2 fraction = 0x0008, // 3 expsymbol = 0x0010, // 4 expsign = 0x0020, // 5 exponent = 0x0040, // 6 bad = 0x0080, done = 0x0100 } state; tokentype result = BAD; token = ""; state = start; while (!IsWhiteSpace (curChar) && !(IsPunctuation (curChar) && (curChar != '-')) && (state != bad) && (state != done)) { if (isdigit (curChar)) { switch (state) { case start: case sign: state = digit; break; case expsymbol: case expsign: state = exponent; break; default: break; } } else if ((curChar == '-') || (curChar == '+')) { switch (state) { case start: state = sign; // sign of number break; case digit: state = done; // minus sign is punctuation, such as 6-10 break; case expsymbol: state = expsign; // sign of exponent break; default: state = bad; // syntax error break; } } else if ((curChar == '.') && (state == digit)) state = fraction; else if (((curChar == 'E') || (curChar == 'e')) && (state & (digit | fraction))) state = expsymbol; else state = bad; if ((state != bad) && (state != done)) { token += curChar; curChar = GetNextChar (); } } int isNumber = state & (digit | fraction | exponent | done); if (isNumber) { // We have a number result = NUMBER; if (IsPunctuation (curChar)) { #ifdef __MWERKS__ putBuffer = curChar; #else in.putback (curChar); #endif if (!atEOL) filecol--; } } else { // Not a number, but a string that starts with numbers, such as "00BW0762.1" do { if (curChar == '_') token += ' '; else token += curChar; curChar = GetNextChar (); } while (isalnum (curChar) || (curChar == '_') || (curChar == '.')); if (IsPunctuation (curChar)) { #ifdef __MWERKS__ putBuffer = curChar; #else in.putback (curChar); #endif if (!atEOL) filecol--; } result = STRING; //classify the token } return result; }
//------------------------------------------------------------------------------ Tokeniser::tokentype Tokeniser::GetNextToken () { tokentype TokenType = EMPTY; while ((TokenType == EMPTY) && !in.bad() && !atEOF) { curChar = GetNextChar (); if (IsWhiteSpace (curChar)) { // skip white space } else { if (IsPunctuation (curChar)) { // classify punctuation token switch (curChar) { case '[': ParseComment (); break; case '\'': if (ParseString ()) TokenType = STRING; else TokenType = BAD; break; case '(': TokenType = LPAR; break; case ')': TokenType = RPAR; break; case '{': TokenType = LPAR; break; case '}': TokenType = RPAR; break; case '!': TokenType = BANG; break; case '#': TokenType = HASH; break; case '=': TokenType = EQUALS; break; case ';': TokenType = SEMICOLON; break; case ',': TokenType = COMMA; break; case '*': TokenType = ASTERIX; break; case ':': TokenType = COLON; break; case '-': TokenType = MINUS; break; case '"': TokenType = DOUBLEQUOTE; break; case '/': TokenType = BACKSLASH; break; default: TokenType = OTHER; break; } } else { // It's either a number, or a string if (isdigit (curChar)) { TokenType = ParseNumber(); } else { if (ParseToken ()) TokenType = STRING; else TokenType = BAD; } } } } if ((TokenType != STRING) && (TokenType != NUMBER)) { token = ""; token += curChar; } return TokenType; }
/*---------------------------------------------------------------------------------------------------------------------- | Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number | of useful operations in the process of retrieving tokens: |~ | o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores | is set) | o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved | as the next token | o paired single quotes are automatically converted to single quotes before being stored | o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to | the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to | handle these in an appropriate fashion) | o leading whitespace (including comments) is automatically skipped | o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF | member function | o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper | for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags, | in which case the normal punctuation symbols are treated just like any other darkspace character. |~ | The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments | can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as | tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after | each application. */ void NxsToken::GetNextToken() { ResetToken(); char ch = ' '; if (saved == '\0' || IsWhitespace(saved)) { // Skip leading whitespace // while( IsWhitespace(ch) && !atEOF) ch = GetNextChar(); saved = ch; } for(;;) { // Break now if singleCharacterToken mode on and token length > 0. // if (labileFlags & singleCharacterToken && token.size() > 0) break; // Get next character either from saved or from input stream. // if (saved != '\0') { ch = saved; saved = '\0'; } else ch = GetNextChar(); // Break now if we've hit EOF. // if (atEOF) break; if (ch == '\n' && labileFlags & newlineIsToken) { if (token.size() > 0) { // Newline came after token, save newline until next time when it will be // reported as a separate token. // atEOL = 0; saved = ch; } else { atEOL = 1; AppendToToken(ch); } break; } else if (IsWhitespace(ch)) { // Break only if we've begun adding to token (remember, if we hit a comment before a token, // there might be further white space between the comment and the next token). // if (token.size() > 0) break; } else if (ch == '_') { // If underscores are discovered in unquoted tokens, they should be // automatically converted to spaces. // if (!(labileFlags & preserveUnderscores)) ch = ' '; AppendToToken(ch); } else if (ch == '[') { // Get rest of comment and deal with it, but notice that we only break if the comment ends a token, // not if it starts one (comment counts as whitespace). In the case of command comments // (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because // token.size() will be greater than 0. comment.clear(); GetComment(); if (token.size() > 0) break; } else if (ch == '(' && labileFlags & parentheticalToken) { AppendToToken(ch); // Get rest of parenthetical token. // GetParentheticalToken(); break; } else if (ch == '{' && labileFlags & curlyBracketedToken) { AppendToToken(ch); // Get rest of curly-bracketed token. // GetCurlyBracketedToken(); break; } else if (ch == '\"' && labileFlags & doubleQuotedToken) { // Get rest of double-quoted token. // GetDoubleQuotedToken(); break; } else if (ch == '\'') { if (token.size() > 0) { // We've encountered a single quote after a token has // already begun to be read; should be another tandem // single quote character immediately following. // ch = GetNextChar(); if (ch == '\'') AppendToToken(ch); else { errormsg = "Expecting second single quote character"; throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn()); } } else { // Get rest of quoted NEXUS word and break, since // we will have eaten one token after calling GetQuoted. // GetQuoted(); } break; } else if (IsPunctuation(ch)) { if (token.size() > 0) { // If we've already begun reading the token, encountering // a punctuation character means we should stop, saving // the punctuation character for the next token. // saved = ch; break; } else { // If we haven't already begun reading the token, encountering // a punctuation character means we should stop and return // the punctuation character as this token (i.e., the token // is just the single punctuation character. // AppendToToken(ch); break; } } else { AppendToToken(ch); } } labileFlags = 0; }