Exemple #1
0
 void end_parse()
 {
     switch (state_)
     {
     case states::unquoted_string: 
         before_record();
         end_unquoted_string_value();
         after_field();
         break;
     case states::escaped_value:
         if (parameters_.quote_escape_char() == parameters_.quote_char())
         {
             before_record();
             end_quoted_string_value();
             after_field();
         }
         break;
     }
     if (column_index_ > 0)
     {
         after_record();
     }
     switch (stack_[top_])
     {
     case modes::array:
         if (!pop(modes::array))
         {
             err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this);
         }
         break;
     case modes::object:
         if (!pop(modes::object))
         {
             err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this);
         }
         break;
     case modes::header:
         if (!pop(modes::header))
         {
             err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this);
         }
         break;
     }
     handler_->end_array(*this);
     if (!pop(modes::done))
     {
         err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this);
     }
     handler_->end_json();
 }
Exemple #2
0
 void end_parse(std::error_code& ec)
 {
     switch (state_)
     {
     case csv_state_type::unquoted_string: 
         if (parameters_.trim_leading() || parameters_.trim_trailing())
         {
             trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing());
         }
         if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0))
         {
             if (column_index_ == 0)
             {
                 before_record();
             }
             if (stack_[top_] != csv_mode_type::subfields)
             {
                 before_field();
             }
             end_unquoted_string_value();
             after_field();
         }
         break;
     case csv_state_type::escaped_value:
         if (parameters_.quote_escape_char() == parameters_.quote_char())
         {
             if (column_index_ == 0)
             {
                 before_record();
             }
             if (stack_[top_] != csv_mode_type::subfields)
             {
                 before_field();
             }
             end_quoted_string_value(ec);
             if (ec) return;
             after_field();
         }
         break;
     default:
         break;
     }
     if (column_index_ > 0)
     {
         after_record();
     }
     switch (stack_[top_])
     {
     case csv_mode_type::header:
         pop_mode(csv_mode_type::header);
         break;
     case csv_mode_type::data:
         pop_mode(csv_mode_type::data);
         break;
     default:
         break;
     }
     if (parameters_.mapping() == mapping_type::m_columns)
     {
         basic_json_fragment_filter<CharT> fragment_filter(handler_);
         handler_.begin_object(*this);
         for (size_t i = 0; i < column_names_.size(); ++i)
         {
             handler_.name(column_names_[i],*this);
             decoders_[i].end_array(*this);
             decoders_[i].end_json();
             decoders_[i].get_result().dump_fragment(fragment_filter);
         }
         handler_.end_object(*this);
     }
     else
     {
         handler_.end_array(*this);
     }
     if (!pop_mode(csv_mode_type::initial))
     {
         err_handler_.fatal_error(csv_parser_errc::unexpected_eof, *this);
         ec = csv_parser_errc::unexpected_eof;
         return;
     }
     handler_.end_json();
 }
Exemple #3
0
    void parse(Char const* p, size_t start, size_t length)
    {
        index_ = start;
        for (; index_ < length && state_ != states::done; ++index_)
        {
            curr_char_ = p[index_];
all_states:
            switch (state_)
            {
            case states::comment:
                if (curr_char_ == '\n')
                {
                    state_ = states::expect_value;
                }
                else if (prev_char_ == '\r')
                {
                    state_ = states::expect_value;
                    goto all_states;
                }
                break;
            case states::expect_value:
                if (column_ == 1 && curr_char_ == parameters_.comment_starter())
                {
                    state_ = states::comment;
                }
                else
                {
                    state_ = states::unquoted_string;
                    goto all_states;
                }
                break;
            case states::between_fields:
                if (curr_char_ == '\r' || (prev_char_ != '\r' && curr_char_ == '\n'))
                {
                    after_record();
                    state_ = states::expect_value;
                }
                else if (curr_char_ == parameters_.field_delimiter())
                {
                    state_ = states::expect_value;
                }
                break;
            case states::escaped_value: 
                {
                    if (curr_char_ == parameters_.quote_char())
                    {
                        string_buffer_.push_back(curr_char_);
                        state_ = states::quoted_string;
                    }
                    else if (parameters_.quote_escape_char() == parameters_.quote_char())
                    {
                        before_record();
                        end_quoted_string_value();
                        after_field();
                        state_ = states::between_fields;
                        goto all_states;
                    }
                }
                break;
            case states::quoted_string: 
                {
                    if (curr_char_ == parameters_.quote_escape_char())
                    {
                        state_ = states::escaped_value;
                    }
                    else if (curr_char_ == parameters_.quote_char())
                    {
                        before_record();
                        end_quoted_string_value();
                        after_field();
                        state_ = states::between_fields;
                    }
                    else
                    {
                        string_buffer_.push_back(curr_char_);
                    }
                }
                break;
            case states::unquoted_string: 
                {
                    if (curr_char_ == '\r' || (prev_char_ != '\r' && curr_char_ == '\n'))
                    {
                        before_record();
                        end_unquoted_string_value();
                        after_field();
                        after_record();
                        state_ = states::expect_value;
                    }
                    else if (curr_char_ == '\n')
                    {
                        if (prev_char_ != '\r')
                        {
                            before_record();
                            end_unquoted_string_value();
                            after_field();
                            after_record();
                            state_ = states::expect_value;
                        }
                    }
                    else if (curr_char_ == parameters_.field_delimiter())
                    {
                        before_record();
                        end_unquoted_string_value();
                        after_field();
                        state_ = states::expect_value;
                    }
                    else if (curr_char_ == parameters_.quote_char())
                    {
                        string_buffer_.clear();
                        state_ = states::quoted_string;
                    }
                    else
                    {
                        string_buffer_.push_back(curr_char_);
                    }
                }
                break;
            default:
                err_handler_->error(std::error_code(csv_parser_errc::invalid_state, csv_error_category()), *this);
                break;
            }
            if (line_ > parameters_.max_lines())
            {
                state_ = states::done;
            }
            switch (curr_char_)
            {
            case '\r':
                ++line_;
                column_ = 1;
                break;
            case '\n':
                if (prev_char_ != '\r')
                {
                    ++line_;
                }
                column_ = 1;
                break;
            default:
                ++column_;
                break;
            }
            prev_char_ = curr_char_;
        }
    }
Exemple #4
0
    void parse(const CharT* p, size_t start, size_t length, std::error_code& ec)
    {
        index_ = start;
        for (; index_ < length && state_ != csv_state_type::done; ++index_)
        {
            curr_char_ = p[index_];
all_csv_states:
            switch (state_)
            {
            case csv_state_type::comment:
                if (curr_char_ == '\n')
                {
                    state_ = csv_state_type::expect_value;
                }
                else if (prev_char_ == '\r')
                {
                    state_ = csv_state_type::expect_value;
                    goto all_csv_states;
                }
                break;
            case csv_state_type::expect_value:
                if (column_ == 1 && curr_char_ == parameters_.comment_starter())
                {
                    state_ = csv_state_type::comment;
                }
                else
                {
                    state_ = csv_state_type::unquoted_string;
                    goto all_csv_states;
                }
                break;
            case csv_state_type::escaped_value: 
                {
                    if (curr_char_ == parameters_.quote_char())
                    {
                        value_buffer_.push_back(static_cast<CharT>(curr_char_));
                        state_ = csv_state_type::quoted_string;
                    }
                    else if (parameters_.quote_escape_char() == parameters_.quote_char())
                    {
                        state_ = csv_state_type::between_fields;
                        goto all_csv_states;
                    }
                }
                break;
            case csv_state_type::quoted_string: 
                {
                    if (curr_char_ == parameters_.quote_escape_char())
                    {
                        state_ = csv_state_type::escaped_value;
                    }
                    else if (curr_char_ == parameters_.quote_char())
                    {
                        state_ = csv_state_type::between_fields;
                    }
                    else
                    {
                        value_buffer_.push_back(static_cast<CharT>(curr_char_));
                    }
                }
                break;
            case csv_state_type::between_fields:
                if (prev_char_ == '\r' && curr_char_ == '\n')
                {
                }
                else if (curr_char_ == '\r' || curr_char_ == '\n')
                {
                    if (parameters_.trim_leading() || parameters_.trim_trailing())
                    {
                        trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing());
                    }
                    if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0))
                    {
                        if (column_index_ == 0)
                        {
                            before_record();
                        }
                        if (stack_[top_] != csv_mode_type::subfields)
                        {
                            before_field();
                        }
                        end_quoted_string_value(ec);
                        if (ec) return;
                        after_field();
                        after_record();
                    }
                    state_ = csv_state_type::expect_value;
                }
                else if (curr_char_ == parameters_.field_delimiter() || (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first))
                {
                    if (column_index_ == 0 && stack_[top_] != csv_mode_type::subfields)
                    {
                        before_record();
                    }
                    if (parameters_.trim_leading() || parameters_.trim_trailing())
                    {
                        trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing());
                    }
                    if (stack_[top_] != csv_mode_type::subfields)
                    {
                        before_field();
                        if (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first)
                        {
                            before_multi_valued_field();
                        }
                    }
                    end_quoted_string_value(ec);
                    if (ec) return;
                    if (curr_char_ == parameters_.field_delimiter())
                    {
                        after_field();
                    }
                    state_ = csv_state_type::unquoted_string;
                }
                break;
            case csv_state_type::unquoted_string: 
                {
                    if (prev_char_ == '\r' && curr_char_ == '\n')
                    {
                    }
                    else if (curr_char_ == '\r' || curr_char_ == '\n')
                    {
                        if (parameters_.trim_leading() || parameters_.trim_trailing())
                        {
                            trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing());
                        }
                        if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0))
                        {
                            if (column_index_ == 0)
                            {
                                before_record();
                            }
                            if (stack_[top_] != csv_mode_type::subfields)
                            {
                                before_field();
                            }
                            end_unquoted_string_value();
                            after_field();
                            after_record();
                        }
                        state_ = csv_state_type::expect_value;
                    }
                    else if (curr_char_ == parameters_.field_delimiter() || (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first))
                    {
                        if (column_index_ == 0 && stack_[top_] != csv_mode_type::subfields)
                        {
                            before_record();
                        }
                        if (parameters_.trim_leading() || parameters_.trim_trailing())
                        {
                            trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing());
                        }
                        if (stack_[top_] != csv_mode_type::subfields)
                        {
                            before_field();
                            if (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first)
                            {
                                before_multi_valued_field();
                            }
                        }
                        end_unquoted_string_value();
                        if (curr_char_ == parameters_.field_delimiter())
                        {
                            after_field();
                        }
                        state_ = csv_state_type::unquoted_string;
                    }
                    else if (curr_char_ == parameters_.quote_char())
                    {
                        value_buffer_.clear();
                        state_ = csv_state_type::quoted_string;
                    }
                    else
                    {
                        value_buffer_.push_back(static_cast<CharT>(curr_char_));
                    }
                }
                break;
            default:
                err_handler_.fatal_error(csv_parser_errc::invalid_state, *this);
                ec = csv_parser_errc::invalid_state;
                return;
            }
            if (line_ > parameters_.max_lines())
            {
                state_ = csv_state_type::done;
            }
            switch (curr_char_)
            {
            case '\r':
                ++line_;
                column_ = 1;
                break;
            case '\n':
                if (prev_char_ != '\r')
                {
                    ++line_;
                }
                column_ = 1;
                break;
            default:
                ++column_;
                break;
            }
            prev_char_ = curr_char_;
        }
    }