示例#1
0
/// We assume that size of the dst buf isn't less than src_size.
static size_t decodeURL(const char * src, size_t src_size, char * dst)
{
	const char * src_prev_pos = src;
	const char * src_curr_pos = src;
	const char * const src_end = src + src_size;
	char * dst_pos = dst;

	while (true)
	{
		src_curr_pos = find_first_symbols<'%'>(src_curr_pos, src_end);

		if (src_curr_pos == src_end)
			break;
		else if (src_end - src_curr_pos < 3)
		{
			src_curr_pos = src_end;
			break;
		}
		else
		{
			unsigned char high = char_to_digit_table[static_cast<unsigned char>(src_curr_pos[1])];
			unsigned char low = char_to_digit_table[static_cast<unsigned char>(src_curr_pos[2])];

			if (high != 0xFF && low != 0xFF)
			{
				unsigned char octet = (high << 4) + low;

				size_t bytes_to_copy = src_curr_pos - src_prev_pos;
				memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
				dst_pos += bytes_to_copy;

				*dst_pos = octet;
				++dst_pos;

				src_prev_pos = src_curr_pos + 3;
			}

			src_curr_pos += 3;
		}
	}

	if (src_prev_pos < src_curr_pos)
	{
		size_t bytes_to_copy = src_curr_pos - src_prev_pos;
		memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
		dst_pos += bytes_to_copy;
	}

	return dst_pos - dst;
}
示例#2
0
ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const
{
    size_t col_size = size();

    if (limit == 0)
        limit = col_size;
    else
        limit = std::min(col_size, limit);

    if (perm.size() < limit)
        throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    if (limit == 0)
        return ColumnFixedString::create(n);

    auto res = ColumnFixedString::create(n);

    Chars_t & res_chars = res->chars;

    res_chars.resize(n * limit);

    size_t offset = 0;
    for (size_t i = 0; i < limit; ++i, offset += n)
        memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[perm[i] * n], n);

    return std::move(res);
}
示例#3
0
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
{
    const ColumnFixedString & src = static_cast<const ColumnFixedString &>(src_);

    if (n != src.getN())
        throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH);

    size_t old_size = chars.size();
    chars.resize(old_size + n);
    memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[n * index], n);
}
示例#4
0
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
{
    size_t size = offsets.size();

    if (limit == 0)
        limit = size;
    else
        limit = std::min(size, limit);

    if (perm.size() < limit)
        throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    if (limit == 0)
        return std::make_shared<ColumnString>();

    std::shared_ptr<ColumnString> res = std::make_shared<ColumnString>();

    Chars_t & res_chars = res->chars;
    Offsets_t & res_offsets = res->offsets;

    if (limit == size)
        res_chars.resize(chars.size());
    else
    {
        size_t new_chars_size = 0;
        for (size_t i = 0; i < limit; ++i)
            new_chars_size += sizeAt(perm[i]);
        res_chars.resize(new_chars_size);
    }

    res_offsets.resize(limit);

    Offset_t current_new_offset = 0;

    for (size_t i = 0; i < limit; ++i)
    {
        size_t j = perm[i];
        size_t string_offset = j == 0 ? 0 : offsets[j - 1];
        size_t string_size = offsets[j] - string_offset;

        memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size);

        current_new_offset += string_size;
        res_offsets[i] = current_new_offset;
    }

    return res;
}
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
    if (limit == 0)
        return ColumnFixedString::create(n);

    auto res = ColumnFixedString::create(n);

    Chars & res_chars = res->chars;

    res_chars.resize(n * limit);

    size_t offset = 0;
    for (size_t i = 0; i < limit; ++i, offset += n)
        memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[indexes[i] * n], n);

    return res;
}
示例#6
0
ColumnPtr ColumnString::replicate(const Offsets_t & replicate_offsets) const
{
    size_t col_size = size();
    if (col_size != replicate_offsets.size())
        throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    std::shared_ptr<ColumnString> res = std::make_shared<ColumnString>();

    if (0 == col_size)
        return res;

    Chars_t & res_chars = res->chars;
    Offsets_t & res_offsets = res->offsets;
    res_chars.reserve(chars.size() / col_size * replicate_offsets.back());
    res_offsets.reserve(replicate_offsets.back());

    Offset_t prev_replicate_offset = 0;
    Offset_t prev_string_offset = 0;
    Offset_t current_new_offset = 0;

    for (size_t i = 0; i < col_size; ++i)
    {
        size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
        size_t string_size = offsets[i] - prev_string_offset;

        for (size_t j = 0; j < size_to_replicate; ++j)
        {
            current_new_offset += string_size;
            res_offsets.push_back(current_new_offset);

            res_chars.resize(res_chars.size() + string_size);
            memcpySmallAllowReadWriteOverflow15(
                &res_chars[res_chars.size() - string_size], &chars[prev_string_offset], string_size);
        }

        prev_replicate_offset = replicate_offsets[i];
        prev_string_offset = offsets[i];
    }

    return res;
}
示例#7
0
ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
{
    size_t col_size = size();
    if (col_size != offsets.size())
        throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    auto res = ColumnFixedString::create(n);

    if (0 == col_size)
        return std::move(res);

    Chars_t & res_chars = res->chars;
    res_chars.resize(n * offsets.back());

    Offset curr_offset = 0;
    for (size_t i = 0; i < col_size; ++i)
        for (size_t next_offset = offsets[i]; curr_offset < next_offset; ++curr_offset)
            memcpySmallAllowReadWriteOverflow15(&res->chars[curr_offset * n], &chars[i * n], n);

    return std::move(res);
}
示例#8
0
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
{
    size_t col_size = size();
    if (col_size != filt.size())
        throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    auto res = ColumnFixedString::create(n);

    if (result_size_hint)
        res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size());

    const UInt8 * filt_pos = &filt[0];
    const UInt8 * filt_end = filt_pos + col_size;
    const UInt8 * data_pos = &chars[0];

#if __SSE2__
    /** A slightly more optimized version.
        * Based on the assumption that often pieces of consecutive values
        *  completely pass or do not pass the filter.
        * Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
        */

    static constexpr size_t SIMD_BYTES = 16;
    const __m128i zero16 = _mm_setzero_si128();
    const UInt8 * filt_end_sse = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
    const size_t chars_per_simd_elements = SIMD_BYTES * n;

    while (filt_pos < filt_end_sse)
    {
        int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));

        if (0 == mask)
        {
            /// Nothing is inserted.
            data_pos += chars_per_simd_elements;
        }
        else if (0xFFFF == mask)
        {
            res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
            data_pos += chars_per_simd_elements;
        }
        else
        {
            size_t res_chars_size = res->chars.size();
            for (size_t i = 0; i < SIMD_BYTES; ++i)
            {
                if (filt_pos[i])
                {
                    res->chars.resize(res_chars_size + n);
                    memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
                    res_chars_size += n;
                }
                data_pos += n;
            }
        }

        filt_pos += SIMD_BYTES;
    }
#endif

    size_t res_chars_size = res->chars.size();
    while (filt_pos < filt_end)
    {
        if (*filt_pos)
        {
            res->chars.resize(res_chars_size + n);
            memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
            res_chars_size += n;
        }

        ++filt_pos;
        data_pos += n;
    }

    return std::move(res);
}
示例#9
0
ColumnPtr ColumnArray::replicateString(const Offsets_t & replicate_offsets) const
{
	size_t col_size = size();
	if (col_size != replicate_offsets.size())
		throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

	ColumnPtr res = cloneEmpty();

	if (0 == col_size)
		return res;

	ColumnArray & res_ = static_cast<ColumnArray &>(*res);

	const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
	const ColumnString::Chars_t & src_chars = src_string.getChars();
	const Offsets_t & src_string_offsets = src_string.getOffsets();
	const Offsets_t & src_offsets = getOffsets();

	ColumnString::Chars_t & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars();
	Offsets_t & res_string_offsets = typeid_cast<ColumnString &>(res_.getData()).getOffsets();
	Offsets_t & res_offsets = res_.getOffsets();

	res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back());
	res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back());
	res_offsets.reserve(replicate_offsets.back());

	Offset_t prev_replicate_offset = 0;

	Offset_t prev_src_offset = 0;
	Offset_t prev_src_string_offset = 0;

	Offset_t current_res_offset = 0;
	Offset_t current_res_string_offset = 0;

	for (size_t i = 0; i < col_size; ++i)
	{
		/// Насколько размножить массив.
		size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
		/// Количество строк в массиве.
		size_t value_size = src_offsets[i] - prev_src_offset;
		/// Количество символов в строках массива, включая нулевые байты.
		size_t sum_chars_size = value_size == 0 ? 0 : (src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset);

		for (size_t j = 0; j < size_to_replicate; ++j)
		{
			current_res_offset += value_size;
			res_offsets.push_back(current_res_offset);

			size_t prev_src_string_offset_local = prev_src_string_offset;
			for (size_t k = 0; k < value_size; ++k)
			{
				/// Размер одной строки.
				size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;

				current_res_string_offset += chars_size;
				res_string_offsets.push_back(current_res_string_offset);

				prev_src_string_offset_local += chars_size;
			}

			/// Копирование символов массива строк.
			res_chars.resize(res_chars.size() + sum_chars_size);
			memcpySmallAllowReadWriteOverflow15(
				&res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size);
		}

		prev_replicate_offset = replicate_offsets[i];
		prev_src_offset = src_offsets[i];
		prev_src_string_offset += sum_chars_size;
	}

	return res;
}
示例#10
0
ColumnPtr ColumnArray::replicateString(const Offsets & replicate_offsets) const
{
    size_t col_size = size();
    if (col_size != replicate_offsets.size())
        throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

    MutableColumnPtr res = cloneEmpty();

    if (0 == col_size)
        return res;

    ColumnArray & res_ = static_cast<ColumnArray &>(*res);

    const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
    const ColumnString::Chars & src_chars = src_string.getChars();
    const Offsets & src_string_offsets = src_string.getOffsets();
    const Offsets & src_offsets = getOffsets();

    ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars();
    Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_.getData()).getOffsets();
    Offsets & res_offsets = res_.getOffsets();

    res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back());
    res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back());
    res_offsets.reserve(replicate_offsets.back());

    Offset prev_replicate_offset = 0;

    Offset prev_src_offset = 0;
    Offset prev_src_string_offset = 0;

    Offset current_res_offset = 0;
    Offset current_res_string_offset = 0;

    for (size_t i = 0; i < col_size; ++i)
    {
        /// How much to replicate the array.
        size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
        /// The number of rows in the array.
        size_t value_size = src_offsets[i] - prev_src_offset;
        /// Number of characters in rows of the array, including zero/null bytes.
        size_t sum_chars_size = value_size == 0 ? 0 : (src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset);

        for (size_t j = 0; j < size_to_replicate; ++j)
        {
            current_res_offset += value_size;
            res_offsets.push_back(current_res_offset);

            size_t prev_src_string_offset_local = prev_src_string_offset;
            for (size_t k = 0; k < value_size; ++k)
            {
                /// Size of one row.
                size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;

                current_res_string_offset += chars_size;
                res_string_offsets.push_back(current_res_string_offset);

                prev_src_string_offset_local += chars_size;
            }

            if (sum_chars_size)
            {
                /// Copies the characters of the array of rows.
                res_chars.resize(res_chars.size() + sum_chars_size);
                memcpySmallAllowReadWriteOverflow15(
                    &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size);
            }
        }

        prev_replicate_offset = replicate_offsets[i];
        prev_src_offset = src_offsets[i];
        prev_src_string_offset += sum_chars_size;
    }

    return res;
}