/** Проверка на распространённый случай ошибки - использование Windows перевода строки.
  */
static void checkForCarriageReturn(ReadBuffer & istr)
{
	if (istr.position()[0] == '\r' || (istr.position() != istr.buffer().begin() && istr.position()[-1] == '\r'))
		throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
			"\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
			" You must transform your file to Unix format."
			"\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
			ErrorCodes::INCORRECT_DATA);
}
LimitReadBuffer::LimitReadBuffer(ReadBuffer & in, size_t limit, bool throw_exception, std::string exception_message)
    : ReadBuffer(in.position(), 0), in(in), limit(limit), throw_exception(throw_exception), exception_message(std::move(exception_message))
{
    size_t remaining_bytes_in_buffer = in.buffer().end() - in.position();
    if (remaining_bytes_in_buffer > limit)
        remaining_bytes_in_buffer = limit;

    working_buffer = Buffer(in.position(), in.position() + remaining_bytes_in_buffer);
}
Exemple #3
0
void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const
{
    /// Little tricky, because we cannot discriminate null from first character.

    if (istr.eof())
        throw Exception("Unexpected end of stream, while parsing value of Nullable type", ErrorCodes::CANNOT_READ_ALL_DATA);

    /// This is not null, surely.
    if (*istr.position() != '\\')
    {
        safeDeserialize(column,
            [] { return false; },
            [this, &istr] (IColumn & nested) { nested_data_type->deserializeTextEscaped(nested, istr); } );
    }
    else
    {
        /// Now we know, that data in buffer starts with backslash.
        ++istr.position();

        if (istr.eof())
            throw Exception("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA);

        safeDeserialize(column,
            [&istr]
            {
                if (*istr.position() == 'N')
                {
                    ++istr.position();
                    return true;
                }
                return false;
            },
            [this, &istr] (IColumn & nested)
            {
                if (istr.position() != istr.buffer().begin())
                {
                    /// We could step back to consume backslash again.
                    --istr.position();
                    nested_data_type->deserializeTextEscaped(nested, istr);
                }
                else
                {
                    /// Otherwise, we need to place backslash back in front of istr.
                    ReadBufferFromMemory prefix("\\", 1);
                    ConcatReadBuffer prepended_istr(prefix, istr);

                    nested_data_type->deserializeTextEscaped(nested, prepended_istr);

                    /// Synchronise cursor position in original buffer.

                    if (prepended_istr.count() > 1)
                        istr.position() = prepended_istr.position();
                }
            });
    }
}
void throwExceptionForIncompletelyParsedValue(
    ReadBuffer & read_buffer, Block & block, size_t result)
{
    const IDataType & to_type = *block.getByPosition(result).type;

    WriteBufferFromOwnString message_buf;
    message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size())
        << " as " << to_type.getName()
        << ": syntax error";

    if (read_buffer.offset())
        message_buf << " at position " << read_buffer.offset()
            << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")";
    else
        message_buf << " at begin of string";

    if (to_type.isNumber())
        message_buf << ". Note: there are to" << to_type.getName() << "OrZero function, which returns zero instead of throwing exception.";

    throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT);
}
    size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum)
    {
        if (compressed_in->eof())
            return 0;

        CityHash_v1_0_2::uint128 checksum;
        compressed_in->readStrict(reinterpret_cast<char *>(&checksum), sizeof(checksum));

        own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE);
        compressed_in->readStrict(&own_compressed_buffer[0], COMPRESSED_BLOCK_HEADER_SIZE);

        UInt8 method = own_compressed_buffer[0];    /// See CompressedWriteBuffer.h

        size_t & size_compressed = size_compressed_without_checksum;

        if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) ||
            method == static_cast<UInt8>(CompressionMethodByte::ZSTD) ||
            method == static_cast<UInt8>(CompressionMethodByte::NONE))
        {
            size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
            size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
        }
        else
            throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD);

        if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
            throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);

        /// Is whole compressed block located in 'compressed_in' buffer?
        if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE &&
            compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end())
        {
            compressed_in->position() -= COMPRESSED_BLOCK_HEADER_SIZE;
            compressed_buffer = compressed_in->position();
            compressed_in->position() += size_compressed;
        }
        else
        {
            own_compressed_buffer.resize(size_compressed + (variant == LZ4_REFERENCE ? 0 : LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER));
            compressed_buffer = &own_compressed_buffer[0];
            compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
        }

        return size_compressed + sizeof(checksum);
    }
void executeQuery(
	ReadBuffer & istr,
	WriteBuffer & ostr,
	Context & context,
	BlockInputStreamPtr & query_plan,
	std::function<void(const String &)> set_content_type)
{
	PODArray<char> parse_buf;
	const char * begin;
	const char * end;

	/// If 'istr' is empty now, fetch next data into buffer.
	if (istr.buffer().size() == 0)
		istr.next();

	size_t max_query_size = context.getSettingsRef().max_query_size;

	if (istr.buffer().end() - istr.position() >= static_cast<ssize_t>(max_query_size))
	{
		/// If remaining buffer space in 'istr' is enough to parse query up to 'max_query_size' bytes, then parse inplace.
		begin = istr.position();
		end = istr.buffer().end();
		istr.position() += end - begin;
	}
	else
	{
		/// If not - copy enough data into 'parse_buf'.
		parse_buf.resize(max_query_size);
		parse_buf.resize(istr.read(&parse_buf[0], max_query_size));
		begin = &parse_buf[0];
		end = begin + parse_buf.size();
	}

	ASTPtr ast;
	BlockIO streams;

	std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete);

	try
	{
		if (streams.out)
		{
			const ASTInsertQuery * ast_insert_query = dynamic_cast<const ASTInsertQuery *>(ast.get());

			if (!ast_insert_query)
				throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR);

			String format = ast_insert_query->format;
			if (format.empty())
				format = "Values";

			/// Data could be in parsed (ast_insert_query.data) and in not parsed yet (istr) part of query.

			ConcatReadBuffer::ReadBuffers buffers;
			ReadBuffer buf1(const_cast<char *>(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0);

			if (ast_insert_query->data)
				buffers.push_back(&buf1);
			buffers.push_back(&istr);

			/** NOTE Must not read from 'istr' before read all between 'ast_insert_query.data' and 'ast_insert_query.end'.
			  * - because 'query.data' could refer to memory piece, used as buffer for 'istr'.
			  */

			ConcatReadBuffer data_istr(buffers);

			BlockInputStreamPtr in{
				context.getInputFormat(
					format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)};

			copyData(*in, *streams.out);
		}

		if (streams.in)
		{
			const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());

			String format_name = ast_query_with_output && (ast_query_with_output->getFormat() != nullptr)
				? typeid_cast<const ASTIdentifier &>(*ast_query_with_output->getFormat()).name
				: context.getDefaultFormat();

			BlockOutputStreamPtr out = context.getOutputFormat(format_name, ostr, streams.in_sample);

			if (IProfilingBlockInputStream * stream = dynamic_cast<IProfilingBlockInputStream *>(streams.in.get()))
			{
				/// NOTE Progress callback takes shared ownership of 'out'.
				stream->setProgressCallback([out] (const Progress & progress) { out->onProgress(progress); });
			}

			if (set_content_type)
				set_content_type(out->getContentType());

			copyData(*streams.in, *out);
		}
	}
	catch (...)
	{
		streams.onException();
		throw;
	}

	streams.onFinish();
}
Exemple #7
0
void executeQuery(
    ReadBuffer & istr,
    WriteBuffer & ostr,
    bool allow_into_outfile,
    Context & context,
    std::function<void(const String &)> set_content_type)
{
    PODArray<char> parse_buf;
    const char * begin;
    const char * end;

    /// If 'istr' is empty now, fetch next data into buffer.
    if (istr.buffer().size() == 0)
        istr.next();

    size_t max_query_size = context.getSettingsRef().max_query_size;

    if (istr.buffer().end() - istr.position() >= static_cast<ssize_t>(max_query_size))
    {
        /// If remaining buffer space in 'istr' is enough to parse query up to 'max_query_size' bytes, then parse inplace.
        begin = istr.position();
        end = istr.buffer().end();
        istr.position() += end - begin;
    }
    else
    {
        /// If not - copy enough data into 'parse_buf'.
        parse_buf.resize(max_query_size);
        parse_buf.resize(istr.read(&parse_buf[0], max_query_size));
        begin = &parse_buf[0];
        end = begin + parse_buf.size();
    }

    ASTPtr ast;
    BlockIO streams;

    std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete);

    try
    {
        if (streams.out)
        {
            InputStreamFromASTInsertQuery in(ast, istr, streams, context);
            copyData(in, *streams.out);
        }

        if (streams.in)
        {
            const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());

            WriteBuffer * out_buf = &ostr;
            std::experimental::optional<WriteBufferFromFile> out_file_buf;
            if (ast_query_with_output && ast_query_with_output->out_file)
            {
                if (!allow_into_outfile)
                    throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);

                const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
                out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
                out_buf = &out_file_buf.value();
            }

            String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
                ? typeid_cast<const ASTIdentifier &>(*ast_query_with_output->format).name
                : context.getDefaultFormat();

            BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in_sample);

            if (auto stream = dynamic_cast<IProfilingBlockInputStream *>(streams.in.get()))
            {
                /// Save previous progress callback if any. TODO Do it more conveniently.
                auto previous_progress_callback = context.getProgressCallback();

                /// NOTE Progress callback takes shared ownership of 'out'.
                stream->setProgressCallback([out, previous_progress_callback] (const Progress & progress)
                {
                    if (previous_progress_callback)
                        previous_progress_callback(progress);
                    out->onProgress(progress);
                });
            }

            if (set_content_type)
                set_content_type(out->getContentType());

            copyData(*streams.in, *out);
        }
    }
    catch (...)
    {
        streams.onException();
        throw;
    }

    streams.onFinish();
}