void end_parse() { switch (state_) { case states::unquoted_string: before_record(); end_unquoted_string_value(); after_field(); break; case states::escaped_value: if (parameters_.quote_escape_char() == parameters_.quote_char()) { before_record(); end_quoted_string_value(); after_field(); } break; } if (column_index_ > 0) { after_record(); } switch (stack_[top_]) { case modes::array: if (!pop(modes::array)) { err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this); } break; case modes::object: if (!pop(modes::object)) { err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this); } break; case modes::header: if (!pop(modes::header)) { err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this); } break; } handler_->end_array(*this); if (!pop(modes::done)) { err_handler_->error(std::error_code(csv_parser_errc::unexpected_eof, csv_error_category()), *this); } handler_->end_json(); }
void end_parse(std::error_code& ec) { switch (state_) { case csv_state_type::unquoted_string: if (parameters_.trim_leading() || parameters_.trim_trailing()) { trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing()); } if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0)) { if (column_index_ == 0) { before_record(); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); } end_unquoted_string_value(); after_field(); } break; case csv_state_type::escaped_value: if (parameters_.quote_escape_char() == parameters_.quote_char()) { if (column_index_ == 0) { before_record(); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); } end_quoted_string_value(ec); if (ec) return; after_field(); } break; default: break; } if (column_index_ > 0) { after_record(); } switch (stack_[top_]) { case csv_mode_type::header: pop_mode(csv_mode_type::header); break; case csv_mode_type::data: pop_mode(csv_mode_type::data); break; default: break; } if (parameters_.mapping() == mapping_type::m_columns) { basic_json_fragment_filter<CharT> fragment_filter(handler_); handler_.begin_object(*this); for (size_t i = 0; i < column_names_.size(); ++i) { handler_.name(column_names_[i],*this); decoders_[i].end_array(*this); decoders_[i].end_json(); decoders_[i].get_result().dump_fragment(fragment_filter); } handler_.end_object(*this); } else { handler_.end_array(*this); } if (!pop_mode(csv_mode_type::initial)) { err_handler_.fatal_error(csv_parser_errc::unexpected_eof, *this); ec = csv_parser_errc::unexpected_eof; return; } handler_.end_json(); }
void parse(Char const* p, size_t start, size_t length) { index_ = start; for (; index_ < length && state_ != states::done; ++index_) { curr_char_ = p[index_]; all_states: switch (state_) { case states::comment: if (curr_char_ == '\n') { state_ = states::expect_value; } else if (prev_char_ == '\r') { state_ = states::expect_value; goto all_states; } break; case states::expect_value: if (column_ == 1 && curr_char_ == parameters_.comment_starter()) { state_ = states::comment; } else { state_ = states::unquoted_string; goto all_states; } break; case states::between_fields: if (curr_char_ == '\r' || (prev_char_ != '\r' && curr_char_ == '\n')) { after_record(); state_ = states::expect_value; } else if (curr_char_ == parameters_.field_delimiter()) { state_ = states::expect_value; } break; case states::escaped_value: { if (curr_char_ == parameters_.quote_char()) { string_buffer_.push_back(curr_char_); state_ = states::quoted_string; } else if (parameters_.quote_escape_char() == parameters_.quote_char()) { before_record(); end_quoted_string_value(); after_field(); state_ = states::between_fields; goto all_states; } } break; case states::quoted_string: { if (curr_char_ == parameters_.quote_escape_char()) { state_ = states::escaped_value; } else if (curr_char_ == parameters_.quote_char()) { before_record(); end_quoted_string_value(); after_field(); state_ = states::between_fields; } else { string_buffer_.push_back(curr_char_); } } break; case states::unquoted_string: { if (curr_char_ == '\r' || (prev_char_ != '\r' && curr_char_ == '\n')) { before_record(); end_unquoted_string_value(); after_field(); after_record(); state_ = states::expect_value; } else if (curr_char_ == '\n') { if (prev_char_ != '\r') { before_record(); end_unquoted_string_value(); after_field(); after_record(); state_ = states::expect_value; } } else if (curr_char_ == parameters_.field_delimiter()) { before_record(); end_unquoted_string_value(); after_field(); state_ = states::expect_value; } else if (curr_char_ == parameters_.quote_char()) { string_buffer_.clear(); state_ = states::quoted_string; } else { string_buffer_.push_back(curr_char_); } } break; default: err_handler_->error(std::error_code(csv_parser_errc::invalid_state, csv_error_category()), *this); break; } if (line_ > parameters_.max_lines()) { state_ = states::done; } switch (curr_char_) { case '\r': ++line_; column_ = 1; break; case '\n': if (prev_char_ != '\r') { ++line_; } column_ = 1; break; default: ++column_; break; } prev_char_ = curr_char_; } }
void parse(const CharT* p, size_t start, size_t length, std::error_code& ec) { index_ = start; for (; index_ < length && state_ != csv_state_type::done; ++index_) { curr_char_ = p[index_]; all_csv_states: switch (state_) { case csv_state_type::comment: if (curr_char_ == '\n') { state_ = csv_state_type::expect_value; } else if (prev_char_ == '\r') { state_ = csv_state_type::expect_value; goto all_csv_states; } break; case csv_state_type::expect_value: if (column_ == 1 && curr_char_ == parameters_.comment_starter()) { state_ = csv_state_type::comment; } else { state_ = csv_state_type::unquoted_string; goto all_csv_states; } break; case csv_state_type::escaped_value: { if (curr_char_ == parameters_.quote_char()) { value_buffer_.push_back(static_cast<CharT>(curr_char_)); state_ = csv_state_type::quoted_string; } else if (parameters_.quote_escape_char() == parameters_.quote_char()) { state_ = csv_state_type::between_fields; goto all_csv_states; } } break; case csv_state_type::quoted_string: { if (curr_char_ == parameters_.quote_escape_char()) { state_ = csv_state_type::escaped_value; } else if (curr_char_ == parameters_.quote_char()) { state_ = csv_state_type::between_fields; } else { value_buffer_.push_back(static_cast<CharT>(curr_char_)); } } break; case csv_state_type::between_fields: if (prev_char_ == '\r' && curr_char_ == '\n') { } else if (curr_char_ == '\r' || curr_char_ == '\n') { if (parameters_.trim_leading() || parameters_.trim_trailing()) { trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing()); } if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0)) { if (column_index_ == 0) { before_record(); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); } end_quoted_string_value(ec); if (ec) return; after_field(); after_record(); } state_ = csv_state_type::expect_value; } else if (curr_char_ == parameters_.field_delimiter() || (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first)) { if (column_index_ == 0 && stack_[top_] != csv_mode_type::subfields) { before_record(); } if (parameters_.trim_leading() || parameters_.trim_trailing()) { trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing()); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); if (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first) { before_multi_valued_field(); } } end_quoted_string_value(ec); if (ec) return; if (curr_char_ == parameters_.field_delimiter()) { after_field(); } state_ = csv_state_type::unquoted_string; } break; case csv_state_type::unquoted_string: { if (prev_char_ == '\r' && curr_char_ == '\n') { } else if (curr_char_ == '\r' || curr_char_ == '\n') { if (parameters_.trim_leading() || parameters_.trim_trailing()) { trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing()); } if (!parameters_.ignore_empty_lines() || (column_index_ > 0 || value_buffer_.length() > 0)) { if (column_index_ == 0) { before_record(); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); } end_unquoted_string_value(); after_field(); after_record(); } state_ = csv_state_type::expect_value; } else if (curr_char_ == parameters_.field_delimiter() || (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first)) { if (column_index_ == 0 && stack_[top_] != csv_mode_type::subfields) { before_record(); } if (parameters_.trim_leading() || parameters_.trim_trailing()) { trim_string_buffer(parameters_.trim_leading(),parameters_.trim_trailing()); } if (stack_[top_] != csv_mode_type::subfields) { before_field(); if (parameters_.subfield_delimiter().second && curr_char_ == parameters_.subfield_delimiter().first) { before_multi_valued_field(); } } end_unquoted_string_value(); if (curr_char_ == parameters_.field_delimiter()) { after_field(); } state_ = csv_state_type::unquoted_string; } else if (curr_char_ == parameters_.quote_char()) { value_buffer_.clear(); state_ = csv_state_type::quoted_string; } else { value_buffer_.push_back(static_cast<CharT>(curr_char_)); } } break; default: err_handler_.fatal_error(csv_parser_errc::invalid_state, *this); ec = csv_parser_errc::invalid_state; return; } if (line_ > parameters_.max_lines()) { state_ = csv_state_type::done; } switch (curr_char_) { case '\r': ++line_; column_ = 1; break; case '\n': if (prev_char_ != '\r') { ++line_; } column_ = 1; break; default: ++column_; break; } prev_char_ = curr_char_; } }