bool JsonParser::JsonStringParser::advance(Char ch) { switch (_state) { case state_0: if (ch == '\\') _state = state_esc; else if (ch == '"') return true; else _str += ch; break; case state_esc: _state = state_0; if (ch == '"' || ch == '\\' || ch == '/') _str += ch; else if (ch == 'b') _str += '\b'; else if (ch == 'f') _str += '\f'; else if (ch == 'n') _str += '\n'; else if (ch == 'r') _str += '\r'; else if (ch == 't') _str += '\t'; else if (ch == 'u') { _value = 0; _count = 4; _state = state_hex; } else _jsonParser->doThrow(std::string("invalid character '") + ch.narrow() + "' in string"); break; case state_hex: if (ch >= '0' && ch <= '9') _value = (_value << 4) | (ch.value() - '0'); else if (ch >= 'a' && ch <= 'f') _value = (_value << 4) | (ch.value() - 'a' + 10); else if (ch >= 'A' && ch <= 'F') _value = (_value << 4) | (ch.value() - 'A' + 10); else _jsonParser->doThrow(std::string("invalid character '") + ch.narrow() + "' in hex sequence"); if (--_count == 0) { _str += Char(static_cast<wchar_t>(_value)); _state = state_0; } break; } return false; }
int JsonParser::advance(Char ch) { int ret; if (ch == '\n') ++_lineNo; try { switch (_state) { case state_0: if (ch == '{') { _state = state_object; _deserializer->setCategory(SerializationInfo::Object); } else if (ch == '[') { _state = state_array; _deserializer->setCategory(SerializationInfo::Array); } else if (ch == '"') { _state = state_string; _deserializer->setCategory(SerializationInfo::Value); } else if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-') { _token = ch; _state = state_number; _deserializer->setCategory(SerializationInfo::Value); } else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) { _token = ch; _state = state_token; } break; case state_object: if (ch == '"') { _state = state_object_name; _stringParser.clear(); } else if (ch == '}') return 1; else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (std::isalpha(ch.value())) { _token = ch; _state = state_object_plainname; } else if (!std::isspace(ch.value())) throwInvalidCharacter(ch); break; case state_object_plainname: if (std::isalnum(ch.value())) _token += ch; else if (std::isspace(ch.value())) { _stringParser.str(_token); _state = state_object_after_name; } else if (ch == ':') { _stringParser.str(_token); if (_next == 0) _next = new JsonParser(); log_debug("begin object member " << _stringParser.str()); _deserializer->beginMember(Utf8Codec::encode(_stringParser.str()), std::string(), SerializationInfo::Void); _next->begin(*_deserializer); _stringParser.clear(); _state = state_object_value; } else throwInvalidCharacter(ch); break; case state_object_name: if (_stringParser.advance(ch)) _state = state_object_after_name; break; case state_object_after_name: if (ch == ':') { if (_next == 0) _next = new JsonParser(); log_debug("begin object member " << _stringParser.str()); _deserializer->beginMember(Utf8Codec::encode(_stringParser.str()), std::string(), SerializationInfo::Void); _next->begin(*_deserializer); _stringParser.clear(); _state = state_object_value; } else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) throwInvalidCharacter(ch); break; case state_object_value: ret = _next->advance(ch); if (ret != 0) { log_debug("leave member"); _deserializer->leaveMember(); _state = state_object_e; } if (ret != -1) break; case state_object_e: if (ch == ',') _state = state_object_next_member; else if (ch == '}') return 1; else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) throwInvalidCharacter(ch); break; case state_object_next_member: if (ch == '"') { _state = state_object_name; _stringParser.clear(); } else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (std::isalpha(ch.value())) { _token = ch; _state = state_object_plainname; } else if (!std::isspace(ch.value())) throwInvalidCharacter(ch); break; case state_array: if (ch == ']') { return 1; } else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) { if (_next == 0) _next = new JsonParser(); log_debug("begin array member"); _deserializer->beginMember(std::string(), std::string(), SerializationInfo::Void); _next->begin(*_deserializer); _next->advance(ch); _state = state_array_value; } break; case state_array_value: ret = _next->advance(ch); if (ret != 0) _state = state_array_e; if (ret != -1) break; case state_array_e: if (ch == ']') { log_debug("leave member"); _deserializer->leaveMember(); return 1; } else if (ch == ',') { log_debug("leave member"); _deserializer->leaveMember(); log_debug("begin array member"); _deserializer->beginMember(std::string(), std::string(), SerializationInfo::Void); _next->begin(*_deserializer); _state = state_array_value; } else if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) throwInvalidCharacter(ch); break; case state_string: if (_stringParser.advance(ch)) { log_debug("set string value \"" << _stringParser.str() << '"'); _deserializer->setValue(_stringParser.str()); _deserializer->setTypeName("string"); _stringParser.clear(); _state = state_end; return 1; } break; case state_number: if (std::isspace(ch.value())) { log_debug("set int value \"" << _token << '"'); _deserializer->setValue(_token); _deserializer->setTypeName("int"); _token.clear(); return 1; } else if (ch == '.' || ch == 'e' || ch == 'E') { _token += ch; _state = state_float; } else if (ch >= '0' && ch <= '9') { _token += ch; } else { log_debug("set int value \"" << _token << '"'); _deserializer->setValue(_token); _deserializer->setTypeName("int"); _token.clear(); return -1; } break; case state_float: if (std::isspace(ch.value())) { log_debug("set double value \"" << _token << '"'); _deserializer->setValue(_token); _deserializer->setTypeName("double"); _token.clear(); return 1; } else if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.' || ch == 'e' || ch == 'E') _token += ch; else { log_debug("set double value \"" << _token << '"'); _deserializer->setValue(_token); _deserializer->setTypeName("double"); _token.clear(); return -1; } break; case state_token: if (std::isalpha(ch.value())) _token += Char(std::tolower(ch)); else { if (_token == "true" || _token == "false") { log_debug("set bool value \"" << _token << '"'); _deserializer->setValue(_token); _deserializer->setTypeName("bool"); _token.clear(); } else if (_token == "null") { log_debug("set null value \"" << _token << '"'); _deserializer->setTypeName("null"); _deserializer->setNull(); _token.clear(); } return -1; } break; case state_comment0: if (ch == '/') _state = state_commentline; else if (ch == '*') _state = state_comment; else throwInvalidCharacter(ch); break; case state_commentline: if (ch == '\n') _state = _nextState; break; case state_comment: if (ch == '*') _state = state_comment_e; break; case state_comment_e: if (ch == '/') _state = _nextState; else if (ch != '*') _state = state_comment; break; case state_end: if (ch == '/') { _nextState = _state; _state = state_comment0; } else if (!std::isspace(ch.value())) doThrow(std::string("unexpected character '") + ch.narrow() + "\' after end"); break; } } catch (JsonParserError& e) { e._lineNo = _lineNo; throw; } return 0; }
void JsonParser::throwInvalidCharacter(Char ch) { doThrow((std::string("invalid character '") + ch.narrow() + '\'')); }
void CsvParser::advance(Char ch) { if (ch == L'\n') ++_lineNo; switch (_state) { case state_detectDelim: if (isalnum(ch) || ch == L'_' || ch == L' ') { _titles.back() += ch.narrow(); } else if (ch == L'\n' || ch == L'\r') { log_debug("title=\"" << _titles.back() << '"'); _noColumns = 1; _state = (ch == L'\r' ? state_cr : state_rowstart); } else if (ch == L'\'' || ch == L'"') { _quote = ch; _state = state_detectDelim_q; } else { _delimiter = ch; log_debug("delimiter=" << _delimiter.narrow()); log_debug("title=\"" << _titles.back() << '"'); _titles.push_back(std::string()); _state = state_title; } break; case state_detectDelim_q: if (ch == _quote) { _state = state_detectDelim_postq; } else { _titles.back() += ch.narrow(); } break; case state_detectDelim_postq: if (isalnum(ch) || ch == L'_' || ch == L'\'' || ch == L'"' || ch == L' ') { std::ostringstream msg; msg << "invalid character '" << ch.narrow() << "' within csv title of column " << _titles.size(); SerializationError::doThrow(msg.str()); } else if (ch == L'\n' || ch == L'\r') { log_debug("title=\"" << _titles.back() << '"'); _noColumns = 1; _state = (ch == L'\r' ? state_cr : state_rowstart); } else { _delimiter = ch; log_debug("delimiter=" << _delimiter.narrow()); log_debug("title=\"" << _titles.back() << '"'); _titles.push_back(std::string()); _state = state_title; } break; case state_title: if (ch == L'\n' || ch == L'\r') { log_debug("title=\"" << _titles.back() << '"'); _state = (ch == L'\r' ? state_cr : state_rowstart); _noColumns = _titles.size(); } else if (ch == _delimiter) { log_debug("title=\"" << _titles.back() << '"'); _titles.push_back(std::string()); } else if (ch == L'\'' || ch == L'\"') { if (_titles.back().empty()) { _quote = ch; _state = state_qtitle; } else { std::ostringstream msg; msg << "unexpected quote character within csv title of column " << _titles.size(); SerializationError::doThrow(msg.str()); } } else { _titles.back() += ch.narrow(); } break; case state_qtitle: if (ch == _quote) { _state = state_qtitlep; } else { _titles.back() += ch.narrow(); } break; case state_qtitlep: if (ch == L'\n' || ch == L'\r') { log_debug("title=\"" << _titles.back() << '"'); _state = (ch == L'\r' ? state_cr : state_rowstart); _noColumns = _titles.size(); } else if (ch == _delimiter) { log_debug("title=\"" << _titles.back() << '"'); _titles.push_back(std::string()); _state = state_title; } else { std::ostringstream msg; msg << "invalid character '" << ch.narrow() << "' within csv title of column " << _titles.size(); SerializationError::doThrow(msg.str()); } break; case state_cr: _state = state_rowstart; if (ch == L'\n') { break; } // fallthrough case state_rowstart: _column = 0; log_debug("new row"); _deserializer->beginMember(std::string(), std::string(), SerializationInfo::Array); _state = state_datastart; // no break case state_datastart: log_debug("member \"" << (_column < _titles.size() ? _titles[_column] : std::string()) << '"'); _deserializer->beginMember( _column < _titles.size() ? _titles[_column] : std::string(), std::string(), SerializationInfo::Value); if (ch == L'\n' || ch == L'\r') { _deserializer->leaveMember(); checkNoColumns(_column, _noColumns, _lineNo); _deserializer->leaveMember(); _state = (ch == L'\r' ? state_cr : state_rowstart); } else if (ch == L'"' || ch == L'\'') { _quote = ch; _state = state_qdata; } else if (ch == _delimiter) { ++_column; _deserializer->leaveMember(); } else { _value += ch; _state = state_data; } break; case state_data0: if (ch == L'"' || ch == L'\'') { _quote = ch; _state = state_qdata; break; } case state_data: if (ch == L'\n' || ch == L'\r') { log_debug("value \"" << _value << '"'); _deserializer->setValue(_value); _value.clear(); checkNoColumns(_column, _noColumns, _lineNo); _deserializer->leaveMember(); // leave data item _deserializer->leaveMember(); // leave row _state = (ch == L'\r' ? state_cr : state_rowstart); } else if (ch == _delimiter) { log_debug("value \"" << _value << '"'); _deserializer->setValue(_value); _value.clear(); _deserializer->leaveMember(); // leave data item ++_column; log_debug("member \"" << (_column < _titles.size() ? _titles[_column] : std::string()) << '"'); _deserializer->beginMember( _column < _titles.size() ? _titles[_column] : std::string(), std::string(), SerializationInfo::Value); _state = state_data0; } else { _value += ch; } break; case state_qdata: if (ch == _quote) { log_debug("value \"" << _value << '"'); _deserializer->setValue(_value); _value.clear(); _deserializer->leaveMember(); // leave data item _state = state_qdata_end; } else { _value += ch; } break; case state_qdata_end: if (ch == L'\n' || ch == L'\r') { checkNoColumns(_column, _noColumns, _lineNo); _deserializer->leaveMember(); // leave row _state = (ch == L'\r' ? state_cr : state_rowstart); } else if (ch == _delimiter) { ++_column; log_debug("member \"" << (_column < _titles.size() ? _titles[_column] : std::string()) << '"'); _deserializer->beginMember( _column < _titles.size() ? _titles[_column] : std::string(), std::string(), SerializationInfo::Value); _state = state_data0; } else { _value = _quote + _value + ch; _state = state_data; } break; } //log_debug("ch=" << ch.narrow() << " _state=" << _state); }