void MessagePackReader::ParseStream(Handler& handler) { log_time(INFO); // setup for stream parsing handler_ = &handler; parseError_.Clear(); ResetToken(); handler.StartDocument(); // Actually parse the stream. This function may be called recursively. try { ParseStream(); } catch (AnyRpcException &fault) { log_error("catch exception, stream offset=" << is_.Tell()); fault.SetOffset( is_.Tell() ); SetParseError(fault); } handler.EndDocument(); }
void MessagePackReader::ParseInt8() { log_trace(); if (is_.Eof()) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); handler_->Int( static_cast<signed char>(is_.Get()) ); ResetToken(); }
void MessagePackReader::ParseUint32() { log_trace(); unsigned value; if (is_.Read( reinterpret_cast<char*>(&value), 4 ) != 4) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); value = _msgpack_be32( value ); handler_->Uint( value ); ResetToken(); }
void MessagePackReader::ParseInt64() { log_trace(); int64_t value; if (is_.Read( reinterpret_cast<char*>(&value), 8 ) != 8) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); value = _msgpack_be64( value ); handler_->Int64( value ); ResetToken(); }
void MessagePackReader::ParseInt16() { log_trace(); short value; if (is_.Read( reinterpret_cast<char*>(&value), 2 ) != 2) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); value = _msgpack_be16( value ); handler_->Int( static_cast<short>( value ) ); ResetToken(); }
void MessagePackReader::ParseStr(size_t length) { log_debug("MessagePackReader::ParseStr: length=" << length); if (inSitu_) { // When parsing in place, mark the start position and continue with the decoding char* buffer = is_.PutBegin(); if (length != is_.Skip(length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); // Value expects all non-copied strings to be null terminated. // If you are already at the end of file, then you can't add the null terminator bool copy = copy_ || is_.Eof(); GetClearToken(); // get the next token and clear the source, i.e. terminate the string handler_->String(buffer,length,copy); is_.PutEnd(); } else if (length <= 256) { // Read short strings (probably the majority) onto the stack char buffer[257]; if (length != is_.Read(buffer, length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); buffer[length] = 0; handler_->String(buffer,length); ResetToken(); } else { // Long strings need an allocated buffer // It would be more efficient if the ownership of this buffer could be transferred to the handler char *buffer = static_cast<char*>(malloc(length+1)); if (!buffer) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); if (length != is_.Read(buffer, length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); buffer[length] = 0; handler_->String(buffer,length); ResetToken(); free(buffer); } }
void MessagePackReader::ParseArray(size_t length) { log_debug("MessagePackReader::ParseArray, length=" << length); ResetToken(); handler_->StartArray(length); for (size_t i=0; i<length; i++) { ParseStream(); if (i != (length-1)) handler_->ArraySeparator(); } handler_->EndArray(length); }
void MessagePackReader::ParseMap(size_t length) { log_debug("MessagePackReader::ParseMap, length=" << length); ResetToken(); handler_->StartMap(length); for (size_t i=0; i<length; i++) { ParseKey(); ParseStream(); if (i != (length-1)) handler_->MapSeparator(); } handler_->EndMap(length); }
void MessagePackReader::ParseBin(size_t length) { log_debug("MessagePackReader::ParseBin, length=" << length); if (inSitu_) { // When parsing in place, mark the start position and continue with the decoding char* buffer = is_.PutBegin(); if (length != is_.Skip(length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); handler_->Binary((unsigned char*)buffer,length,copy_); // binary data is not null terminated so it can be referenced ResetToken(); is_.PutEnd(); } else if (length <= 256) { // Read short binary data onto the stack char buffer[256]; if (length != is_.Read(buffer, length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); handler_->Binary((unsigned char*)buffer,length); ResetToken(); } else { // Long binary data needs an allocated buffer // It would be more efficient if the ownership of this buffer could be transferred to the handler char *buffer = static_cast<char*>(malloc(length)); if (!buffer) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); if (length != is_.Read(buffer, length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); handler_->Binary((unsigned char*)buffer,length); ResetToken(); free(buffer); } }
void MessagePackReader::ParseFloat64() { log_trace(); union { double d; int64_t i64; char str[8]; } value; if (is_.Read( value.str, 8 ) != 8) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); value.i64 = _msgpack_be64(value.i64); handler_->Double( value.d ); ResetToken(); }
void MessagePackReader::ParseFloat32() { log_trace(); union { float f; int i; char str[4]; } value; if (is_.Read( value.str, 4 ) != 4) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); value.i = _msgpack_be32(value.i); handler_->Float( value.f ); ResetToken(); }
void MessagePackReader::ParseKey() { log_trace(); if (is_.Eof()) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); GetToken(); int length = -1; // only allow string keys although the MessagePack spec allows other types if ((token_ >= MessagePackFixStr) && (token_ < MessagePackNil)) length = token_ & 0x1f; else if (token_ == MessagePackStr8) { if (is_.Eof()) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); length = (unsigned char)is_.Get(); } else anyrpc_throw(AnyRpcErrorValueInvalid, "Invalid value"); if (inSitu_) { // When parsing in place, mark the start position and continue with the decoding char* buffer = is_.PutBegin(); if (length != is_.Skip(length)) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); GetClearToken(); // get the next token and clear the source, i.e. terminate the string handler_->Key(buffer,length,copy_); is_.PutEnd(); } else if (length > 0) { // Key strings can only be up to 256 characters char str[257]; if (is_.Read(str,length) != length) anyrpc_throw(AnyRpcErrorTermination, "Parsing was terminated"); str[length] = 0; handler_->Key(str,length); ResetToken(); } }
/*---------------------------------------------------------------------------------------------------------------------- | Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number | of useful operations in the process of retrieving tokens: |~ | o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores | is set) | o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved | as the next token | o paired single quotes are automatically converted to single quotes before being stored | o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to | the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to | handle these in an appropriate fashion) | o leading whitespace (including comments) is automatically skipped | o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF | member function | o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper | for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags, | in which case the normal punctuation symbols are treated just like any other darkspace character. |~ | The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments | can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as | tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after | each application. */ void NxsToken::GetNextToken() { ResetToken(); char ch = ' '; if (saved == '\0' || IsWhitespace(saved)) { // Skip leading whitespace // while( IsWhitespace(ch) && !atEOF) ch = GetNextChar(); saved = ch; } for(;;) { // Break now if singleCharacterToken mode on and token length > 0. // if (labileFlags & singleCharacterToken && token.size() > 0) break; // Get next character either from saved or from input stream. // if (saved != '\0') { ch = saved; saved = '\0'; } else ch = GetNextChar(); // Break now if we've hit EOF. // if (atEOF) break; if (ch == '\n' && labileFlags & newlineIsToken) { if (token.size() > 0) { // Newline came after token, save newline until next time when it will be // reported as a separate token. // atEOL = 0; saved = ch; } else { atEOL = 1; AppendToToken(ch); } break; } else if (IsWhitespace(ch)) { // Break only if we've begun adding to token (remember, if we hit a comment before a token, // there might be further white space between the comment and the next token). // if (token.size() > 0) break; } else if (ch == '_') { // If underscores are discovered in unquoted tokens, they should be // automatically converted to spaces. // if (!(labileFlags & preserveUnderscores)) ch = ' '; AppendToToken(ch); } else if (ch == '[') { // Get rest of comment and deal with it, but notice that we only break if the comment ends a token, // not if it starts one (comment counts as whitespace). In the case of command comments // (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because // token.size() will be greater than 0. comment.clear(); GetComment(); if (token.size() > 0) break; } else if (ch == '(' && labileFlags & parentheticalToken) { AppendToToken(ch); // Get rest of parenthetical token. // GetParentheticalToken(); break; } else if (ch == '{' && labileFlags & curlyBracketedToken) { AppendToToken(ch); // Get rest of curly-bracketed token. // GetCurlyBracketedToken(); break; } else if (ch == '\"' && labileFlags & doubleQuotedToken) { // Get rest of double-quoted token. // GetDoubleQuotedToken(); break; } else if (ch == '\'') { if (token.size() > 0) { // We've encountered a single quote after a token has // already begun to be read; should be another tandem // single quote character immediately following. // ch = GetNextChar(); if (ch == '\'') AppendToToken(ch); else { errormsg = "Expecting second single quote character"; throw NxsException( errormsg, GetFilePosition(), GetFileLine(), GetFileColumn()); } } else { // Get rest of quoted NEXUS word and break, since // we will have eaten one token after calling GetQuoted. // GetQuoted(); } break; } else if (IsPunctuation(ch)) { if (token.size() > 0) { // If we've already begun reading the token, encountering // a punctuation character means we should stop, saving // the punctuation character for the next token. // saved = ch; break; } else { // If we haven't already begun reading the token, encountering // a punctuation character means we should stop and return // the punctuation character as this token (i.e., the token // is just the single punctuation character. // AppendToToken(ch); break; } } else { AppendToToken(ch); } } labileFlags = 0; }
void MessagePackReader::ParseNull() { log_trace(); handler_->Null(); ResetToken(); }
void MessagePackReader::ParseTrue() { log_trace(); handler_->BoolTrue(); ResetToken(); }
void MessagePackReader::ParsePositiveFixInt() { log_trace(); handler_->Uint( static_cast<unsigned>(token_) ); ResetToken(); }
void MessagePackReader::ParseNegativeFixInt() { log_trace(); handler_->Int( static_cast<signed char>(token_) ); ResetToken(); }