Exemplo n.º 1
0
bool JsonParser::JsonStringParser::advance(Char ch)
{
    switch (_state)
    {
        case state_0:
            if (ch == '\\')
                _state = state_esc;
            else if (ch == '"')
                return true;
            else
                _str += ch;
            break;

        case state_esc:
            _state = state_0;
            if (ch == '"' || ch == '\\' || ch == '/')
                _str += ch;
            else if (ch == 'b')
                _str += '\b';
            else if (ch == 'f')
                _str += '\f';
            else if (ch == 'n')
                _str += '\n';
            else if (ch == 'r')
                _str += '\r';
            else if (ch == 't')
                _str += '\t';
            else if (ch == 'u')
            {
                _value = 0;
                _count = 4;
                _state = state_hex;
            }
            else
                _jsonParser->doThrow(std::string("invalid character '") + ch.narrow() + "' in string");
            break;

        case state_hex:
            if (ch >= '0' && ch <= '9')
                _value = (_value << 4) | (ch.value() - '0');
            else if (ch >= 'a' && ch <= 'f')
                _value = (_value << 4) | (ch.value() - 'a' + 10);
            else if (ch >= 'A' && ch <= 'F')
                _value = (_value << 4) | (ch.value() - 'A' + 10);
            else
                _jsonParser->doThrow(std::string("invalid character '") + ch.narrow() + "' in hex sequence");

            if (--_count == 0)
            {
                _str += Char(static_cast<wchar_t>(_value));
                _state = state_0;
            }

            break;

    }

    return false;
}
Exemplo n.º 2
0
int JsonParser::advance(Char ch)
{
    int ret;

    if (ch == '\n')
      ++_lineNo;

    try
    {
        switch (_state)
        {
            case state_0:
                if (ch == '{')
                {
                    _state = state_object;
                    _deserializer->setCategory(SerializationInfo::Object);
                }
                else if (ch == '[')
                {
                    _state = state_array;
                    _deserializer->setCategory(SerializationInfo::Array);
                }
                else if (ch == '"')
                {
                    _state = state_string;
                    _deserializer->setCategory(SerializationInfo::Value);
                }
                else if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-')
                {
                    _token = ch;
                    _state = state_number;
                    _deserializer->setCategory(SerializationInfo::Value);
                }
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                {
                    _token = ch;
                    _state = state_token;
                }
                break;

            case state_object:
                if (ch == '"')
                {
                    _state = state_object_name;
                    _stringParser.clear();
                }
                else if (ch == '}')
                    return 1;
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (std::isalpha(ch.value()))
                {
                    _token = ch;
                    _state = state_object_plainname;
                }
                else if (!std::isspace(ch.value()))
                    throwInvalidCharacter(ch);
                break;

            case state_object_plainname:
                if (std::isalnum(ch.value()))
                    _token += ch;
                else if (std::isspace(ch.value()))
                {
                    _stringParser.str(_token);
                    _state = state_object_after_name;
                }
                else if (ch == ':')
                {
                    _stringParser.str(_token);
                    if (_next == 0)
                        _next = new JsonParser();
                    log_debug("begin object member " << _stringParser.str());
                    _deserializer->beginMember(Utf8Codec::encode(_stringParser.str()),
                            std::string(), SerializationInfo::Void);
                    _next->begin(*_deserializer);
                    _stringParser.clear();
                    _state = state_object_value;
                }
                else
                    throwInvalidCharacter(ch);

                break;

            case state_object_name:
                if (_stringParser.advance(ch))
                    _state = state_object_after_name;
                break;

            case state_object_after_name:
                if (ch == ':')
                {
                    if (_next == 0)
                        _next = new JsonParser();
                    log_debug("begin object member " << _stringParser.str());
                    _deserializer->beginMember(Utf8Codec::encode(_stringParser.str()),
                            std::string(), SerializationInfo::Void);
                    _next->begin(*_deserializer);
                    _stringParser.clear();
                    _state = state_object_value;
                }
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                    throwInvalidCharacter(ch);
                break;

            case state_object_value:
                ret = _next->advance(ch);

                if (ret != 0)
                {
                    log_debug("leave member");
                    _deserializer->leaveMember();
                    _state = state_object_e;
                }

                if (ret != -1)
                    break;

            case state_object_e:
                if (ch == ',')
                    _state = state_object_next_member;
                else if (ch == '}')
                    return 1;
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                    throwInvalidCharacter(ch);
                break;

            case state_object_next_member:
                if (ch == '"')
                {
                    _state = state_object_name;
                    _stringParser.clear();
                }
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (std::isalpha(ch.value()))
                {
                    _token = ch;
                    _state = state_object_plainname;
                }
                else if (!std::isspace(ch.value()))
                    throwInvalidCharacter(ch);
                break;

            case state_array:
                if (ch == ']')
                {
                    return 1;
                }
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                {
                    if (_next == 0)
                        _next = new JsonParser();

                    log_debug("begin array member");
                    _deserializer->beginMember(std::string(),
                            std::string(), SerializationInfo::Void);
                    _next->begin(*_deserializer);
                    _next->advance(ch);
                    _state = state_array_value;
                }
                break;

            case state_array_value:
                ret = _next->advance(ch);
                if (ret != 0)
                    _state = state_array_e;
                if (ret != -1)
                    break;

            case state_array_e:
                if (ch == ']')
                {
                    log_debug("leave member");
                    _deserializer->leaveMember();
                    return 1;
                }
                else if (ch == ',')
                {
                    log_debug("leave member");
                    _deserializer->leaveMember();

                    log_debug("begin array member");
                    _deserializer->beginMember(std::string(),
                            std::string(), SerializationInfo::Void);
                    _next->begin(*_deserializer);
                    _state = state_array_value;
                }
                else if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                    throwInvalidCharacter(ch);
                break;

            case state_string:
                if (_stringParser.advance(ch))
                {
                    log_debug("set string value \"" << _stringParser.str() << '"');
                    _deserializer->setValue(_stringParser.str());
                    _deserializer->setTypeName("string");
                    _stringParser.clear();
                    _state = state_end;
                    return 1;
                }
                break;

            case state_number:
                if (std::isspace(ch.value()))
                {
                    log_debug("set int value \"" << _token << '"');
                    _deserializer->setValue(_token);
                    _deserializer->setTypeName("int");
                    _token.clear();
                    return 1;
                }
                else if (ch == '.' || ch == 'e' || ch == 'E')
                {
                    _token += ch;
                    _state = state_float;
                }
                else if (ch >= '0' && ch <= '9')
                {
                    _token += ch;
                }
                else
                {
                    log_debug("set int value \"" << _token << '"');
                    _deserializer->setValue(_token);
                    _deserializer->setTypeName("int");
                    _token.clear();
                    return -1;
                }
                break;

            case state_float:
                if (std::isspace(ch.value()))
                {
                    log_debug("set double value \"" << _token << '"');
                    _deserializer->setValue(_token);
                    _deserializer->setTypeName("double");
                    _token.clear();
                    return 1;
                }
                else if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-'
                        || ch == '.' || ch == 'e' || ch == 'E')
                    _token += ch;
                else
                {
                    log_debug("set double value \"" << _token << '"');
                    _deserializer->setValue(_token);
                    _deserializer->setTypeName("double");
                    _token.clear();
                    return -1;
                }
                break;

            case state_token:
                if (std::isalpha(ch.value()))
                    _token += Char(std::tolower(ch));
                else
                {
                    if (_token == "true" || _token == "false")
                    {
                        log_debug("set bool value \"" << _token << '"');
                        _deserializer->setValue(_token);
                        _deserializer->setTypeName("bool");
                        _token.clear();
                    }
                    else if (_token == "null")
                    {
                        log_debug("set null value \"" << _token << '"');
                        _deserializer->setTypeName("null");
                        _deserializer->setNull();
                        _token.clear();
                    }

                    return -1;
                }

                break;

            case state_comment0:
                if (ch == '/')
                    _state = state_commentline;
                else if (ch == '*')
                    _state = state_comment;
                else
                    throwInvalidCharacter(ch);
                break;

            case state_commentline:
                if (ch == '\n')
                    _state = _nextState;
                break;

            case state_comment:
                if (ch == '*')
                    _state = state_comment_e;
                break;

            case state_comment_e:
                if (ch == '/')
                    _state = _nextState;
                else if (ch != '*')
                    _state = state_comment;
                break;

            case state_end:
                if (ch == '/')
                {
                    _nextState = _state;
                    _state = state_comment0;
                }
                else if (!std::isspace(ch.value()))
                    doThrow(std::string("unexpected character '") + ch.narrow() + "\' after end");
                break;
        }
    }
    catch (JsonParserError& e)
    {
        e._lineNo = _lineNo;
        throw;
    }

    return 0;
}
Exemplo n.º 3
0
void JsonParser::throwInvalidCharacter(Char ch)
{
  doThrow((std::string("invalid character '") + ch.narrow() + '\''));
}
Exemplo n.º 4
0
void CsvParser::advance(Char ch)
{
    if (ch == L'\n')
        ++_lineNo;

    switch (_state)
    {
        case state_detectDelim:
            if (isalnum(ch) || ch == L'_' || ch == L' ')
            {
                _titles.back() += ch.narrow();
            }
            else if (ch == L'\n' || ch == L'\r')
            {
                log_debug("title=\"" << _titles.back() << '"');
                _noColumns = 1;
                _state = (ch == L'\r' ? state_cr : state_rowstart);
            }
            else if (ch == L'\'' || ch == L'"')
            {
                _quote = ch;
                _state = state_detectDelim_q;
            }
            else
            {
                _delimiter = ch;
                log_debug("delimiter=" << _delimiter.narrow());
                log_debug("title=\"" << _titles.back() << '"');
                _titles.push_back(std::string());
                _state = state_title;
            }
            break;

        case state_detectDelim_q:
            if (ch == _quote)
            {
                _state = state_detectDelim_postq;
            }
            else
            {
                _titles.back() += ch.narrow();
            }
            break;

        case state_detectDelim_postq:
            if (isalnum(ch) || ch == L'_' || ch == L'\'' || ch == L'"' || ch == L' ')
            {
                std::ostringstream msg;
                msg << "invalid character '" << ch.narrow() << "' within csv title of column " << _titles.size();
                SerializationError::doThrow(msg.str());
            }
            else if (ch == L'\n' || ch == L'\r')
            {
                log_debug("title=\"" << _titles.back() << '"');
                _noColumns = 1;
                _state = (ch == L'\r' ? state_cr : state_rowstart);
            }
            else
            {
                _delimiter = ch;
                log_debug("delimiter=" << _delimiter.narrow());
                log_debug("title=\"" << _titles.back() << '"');
                _titles.push_back(std::string());
                _state = state_title;
            }
            break;

        case state_title:
            if (ch == L'\n' || ch == L'\r')
            {
                log_debug("title=\"" << _titles.back() << '"');
                _state = (ch == L'\r' ? state_cr : state_rowstart);
                _noColumns = _titles.size();
            }
            else if (ch == _delimiter)
            {
                log_debug("title=\"" << _titles.back() << '"');
                _titles.push_back(std::string());
            }
            else if (ch == L'\'' || ch == L'\"')
            {
                if (_titles.back().empty())
                {
                    _quote = ch;
                    _state = state_qtitle;
                }
                else
                {
                    std::ostringstream msg;
                    msg << "unexpected quote character within csv title of column " << _titles.size();
                    SerializationError::doThrow(msg.str());
                }
            }
            else
            {
                _titles.back() += ch.narrow();
            }
            break;

        case state_qtitle:
            if (ch == _quote)
            {
                _state = state_qtitlep;
            }
            else
            {
                _titles.back() += ch.narrow();
            }
            break;

        case state_qtitlep:
            if (ch == L'\n' || ch == L'\r')
            {
                log_debug("title=\"" << _titles.back() << '"');
                _state = (ch == L'\r' ? state_cr : state_rowstart);
                _noColumns = _titles.size();
            }
            else if (ch == _delimiter)
            {
                log_debug("title=\"" << _titles.back() << '"');
                _titles.push_back(std::string());
                _state = state_title;
            }
            else
            {
                std::ostringstream msg;
                msg << "invalid character '" << ch.narrow() << "' within csv title of column " << _titles.size();
                SerializationError::doThrow(msg.str());
            }
            break;

        case state_cr:
            _state = state_rowstart;
            if (ch == L'\n')
            {
                break;
            }
            // fallthrough

        case state_rowstart:
            _column = 0;
            log_debug("new row");
            _deserializer->beginMember(std::string(),
                std::string(), SerializationInfo::Array);
            _state = state_datastart;
            // no break

        case state_datastart:
            log_debug("member \""
                << (_column < _titles.size() ? _titles[_column] : std::string()) << '"');
            _deserializer->beginMember(
                _column < _titles.size() ? _titles[_column] : std::string(),
                std::string(), SerializationInfo::Value);

            if (ch == L'\n' || ch == L'\r')
            {
                _deserializer->leaveMember();
                checkNoColumns(_column, _noColumns, _lineNo);
                _deserializer->leaveMember();
                _state = (ch == L'\r' ? state_cr : state_rowstart);
            }
            else if (ch == L'"' || ch == L'\'')
            {
                _quote = ch;
                _state = state_qdata;
            }
            else if (ch == _delimiter)
            {
                ++_column;
                _deserializer->leaveMember();
            }
            else
            {
                _value += ch;
                _state = state_data;
            }
            break;

        case state_data0:
            if (ch == L'"' || ch == L'\'')
            {
                _quote = ch;
                _state = state_qdata;
                break;
            }

        case state_data:
            if (ch == L'\n' || ch == L'\r')
            {
                log_debug("value \"" << _value << '"');
                _deserializer->setValue(_value);
                _value.clear();
                checkNoColumns(_column, _noColumns, _lineNo);
                _deserializer->leaveMember();  // leave data item
                _deserializer->leaveMember();  // leave row
                _state = (ch == L'\r' ? state_cr : state_rowstart);
            }
            else if (ch == _delimiter)
            {
                log_debug("value \"" << _value << '"');
                _deserializer->setValue(_value);
                _value.clear();
                _deserializer->leaveMember();  // leave data item
                ++_column;
                log_debug("member \""
                    << (_column < _titles.size() ? _titles[_column] : std::string()) << '"');
                _deserializer->beginMember(
                    _column < _titles.size() ? _titles[_column] : std::string(),
                    std::string(), SerializationInfo::Value);
                _state = state_data0;
            }
            else
            {
                _value  += ch;
            }
            break;

        case state_qdata:
            if (ch == _quote)
            {
                log_debug("value \"" << _value << '"');
                _deserializer->setValue(_value);
                _value.clear();
                _deserializer->leaveMember();  // leave data item
                _state = state_qdata_end;
            }
            else
            {
                _value += ch;
            }
            break;

        case state_qdata_end:
            if (ch == L'\n' || ch == L'\r')
            {
                checkNoColumns(_column, _noColumns, _lineNo);
                _deserializer->leaveMember();  // leave row
                _state = (ch == L'\r' ? state_cr : state_rowstart);
            }
            else if (ch == _delimiter)
            {
                ++_column;
                log_debug("member \""
                    << (_column < _titles.size() ? _titles[_column] : std::string()) << '"');
                _deserializer->beginMember(
                    _column < _titles.size() ? _titles[_column] : std::string(),
                    std::string(), SerializationInfo::Value);
                _state = state_data0;
            }
            else
            {
                _value = _quote + _value + ch;
                _state = state_data;
            }
            break;
    }

    //log_debug("ch=" << ch.narrow() << " _state=" << _state);
}