static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested) { ColumnArray & column_array = static_cast<ColumnArray &>(column); ColumnArray::Offsets & offsets = column_array.getOffsets(); IColumn & nested_column = column_array.getData(); size_t size = 0; assertChar('[', istr); try { bool first = true; while (!istr.eof() && *istr.position() != ']') { if (!first) { if (*istr.position() == ',') ++istr.position(); else throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); } first = false; skipWhitespaceIfAny(istr); if (*istr.position() == ']') break; read_nested(nested_column); ++size; skipWhitespaceIfAny(istr); } assertChar(']', istr); } catch (...) { if (size) nested_column.popBack(size); throw; } offsets.push_back((offsets.empty() ? 0 : offsets.back()) + size); }
void DataTypeTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const { deserializeSafe(elems, column, istr, [&] { const size_t size = elems.size(); for (const auto i : ext::range(0, size)) { if (i != 0) { skipWhitespaceIfAny(istr); assertChar(delimiter, istr); skipWhitespaceIfAny(istr); } elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, delimiter); } }); }
void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr) const { const size_t size = elems.size(); assertChar('[', istr); deserializeSafe(elems, column, istr, [&] { for (const auto i : ext::range(0, size)) { skipWhitespaceIfAny(istr); if (i != 0) assertChar(',', istr); elems[i]->deserializeTextJSON(extractElementColumn(column, i), istr); } }); skipWhitespaceIfAny(istr); assertChar(']', istr); }
bool ValuesRowInputStream::read(MutableColumns & columns) { size_t num_columns = columns.size(); skipWhitespaceIfAny(istr); if (istr.eof() || *istr.position() == ';') return false; /** Typically, this is the usual format for streaming parsing. * But as an exception, it also supports processing arbitrary expressions instead of values. * This is very inefficient. But if there are no expressions, then there is no overhead. */ ParserExpression parser; assertChar('(', istr); for (size_t i = 0; i < num_columns; ++i) { skipWhitespaceIfAny(istr); char * prev_istr_position = istr.position(); size_t prev_istr_bytes = istr.count() - istr.offset(); bool rollback_on_exception = false; try { header.getByPosition(i).type->deserializeTextQuoted(*columns[i], istr, format_settings); rollback_on_exception = true; skipWhitespaceIfAny(istr); if (i != num_columns - 1) assertChar(',', istr); else assertChar(')', istr); } catch (const Exception & e) { if (!format_settings.values.interpret_expressions) throw; /** The normal streaming parser could not parse the value. * Let's try to parse it with a SQL parser as a constant expression. * This is an exceptional case. */ if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || e.code() == ErrorCodes::CANNOT_PARSE_NUMBER || e.code() == ErrorCodes::CANNOT_PARSE_DATE || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT) { /// TODO Case when the expression does not fit entirely in the buffer. /// If the beginning of the value is no longer in the buffer. if (istr.count() - istr.offset() != prev_istr_bytes) throw; if (rollback_on_exception) columns[i]->popBack(1); const IDataType & type = *header.getByPosition(i).type; Expected expected; Tokens tokens(prev_istr_position, istr.buffer().end()); TokenIterator token_iterator(tokens); ASTPtr ast; if (!parser.parse(token_iterator, ast, expected)) throw Exception("Cannot parse expression of type " + type.getName() + " here: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), ErrorCodes::SYNTAX_ERROR); istr.position() = const_cast<char *>(token_iterator->begin); std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, *context); Field value = convertFieldToType(value_raw.first, type, value_raw.second.get()); /// Check that we are indeed allowed to insert a NULL. if (value.isNull()) { if (!type.isNullable()) throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) + ", that is out of range of type " + type.getName() + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; } columns[i]->insert(value); skipWhitespaceIfAny(istr); if (i != num_columns - 1) assertChar(',', istr); else assertChar(')', istr); } else throw; } } skipWhitespaceIfAny(istr); if (!istr.eof() && *istr.position() == ',') ++istr.position(); return true; }
bool ValuesRowInputStream::read(Block & block) { size_t size = block.columns(); skipWhitespaceIfAny(istr); if (istr.eof() || *istr.position() == ';') return false; /** Как правило, это обычный формат для потокового парсинга. * Но в качестве исключения, поддерживается также обработка произвольных выражений вместо значений. * Это очень неэффективно. Но если выражений нет, то оверхед отсутствует. */ ParserExpressionWithOptionalAlias parser(false); assertChar('(', istr); for (size_t i = 0; i < size; ++i) { skipWhitespaceIfAny(istr); char * prev_istr_position = istr.position(); size_t prev_istr_bytes = istr.count() - istr.offset(); auto & col = block.getByPosition(i); bool rollback_on_exception = false; try { col.type.get()->deserializeTextQuoted(*col.column.get(), istr); rollback_on_exception = true; skipWhitespaceIfAny(istr); if (i != size - 1) assertChar(',', istr); else assertChar(')', istr); } catch (const Exception & e) { if (!interpret_expressions) throw; /** Обычный потоковый парсер не смог распарсить значение. * Попробуем распарсить его SQL-парсером как константное выражение. * Это исключительный случай. */ if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || e.code() == ErrorCodes::CANNOT_PARSE_DATE || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT) { /// TODO Работоспособность, если выражение не помещается целиком до конца буфера. /// Если начало значения уже не лежит в буфере. if (istr.count() - istr.offset() != prev_istr_bytes) throw; if (rollback_on_exception) col.column.get()->popBack(1); IDataType & type = *block.safeGetByPosition(i).type; IParser::Pos pos = prev_istr_position; Expected expected = ""; IParser::Pos max_parsed_pos = pos; ASTPtr ast; if (!parser.parse(pos, istr.buffer().end(), ast, max_parsed_pos, expected)) throw Exception("Cannot parse expression of type " + type.getName() + " here: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), ErrorCodes::SYNTAX_ERROR); istr.position() = const_cast<char *>(max_parsed_pos); std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(ast, context); Field value = convertFieldToType(value_raw.first, type, value_raw.second.get()); if (value.isNull()) { /// Check that we are indeed allowed to insert a NULL. bool is_null_allowed = false; if (type.isNullable()) is_null_allowed = true; else { /// NOTE: For now we support only one level of null values, i.e. /// there are not yet such things as Array(Nullable(Array(Nullable(T))). /// Therefore the code below is valid within the current limitations. const auto array_type = typeid_cast<const DataTypeArray *>(&type); if (array_type != nullptr) { const auto & nested_type = array_type->getMostNestedType(); if (nested_type->isNullable()) is_null_allowed = true; } } if (!is_null_allowed) throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) + ", that is out of range of type " + type.getName() + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; } col.column->insert(value); skipWhitespaceIfAny(istr); if (i != size - 1) assertChar(',', istr); else assertChar(')', istr); } else throw; } } skipWhitespaceIfAny(istr); if (!istr.eof() && *istr.position() == ',') ++istr.position(); return true; }